#!/bin/bash -e
usage="hgLoadEnsembl - Download ENEMBL or VEGA genes from ftp site and create tracks in the browser.

usage:
    hgLoadEnsembl [options] organism version db
options:
      -p - load pseudogene track
      -l liftfile - lift genePreds using this file (usually for contigs to
         random chroms)
      -f filterFile - drop ENSEMBL transcript ids that are in this file.
         Empty lines and whose starting with # are ignored.
      -s superfamilyDb - create superfamily tables from specified database (use downloadSuperfamily to create it)
      -d - drop temporary mysql database after finishing

   for example:
      hgLoadEnsembl homo_sapiens core_45_36g  hg18
   If version starts with vega_, the VEGA tables are loaded.
   All output is in the current directory.
"
kg=0
dropdb=0
pseudo=0
liftfile=NO
filterFile=NO
superfamily=NO
exportOpts=""
while getopts "kpdl:f:s" theOption; do 
    case $theOption in
        k ) kg=1;;
        d ) dropdb=1;;
        p ) pseudo=1
            exportOpts="${exportOpts} -p" ;;
        l ) liftfile=$OPTARG
            exportOpts="${exportOpts} -l $liftfile" ;;
        f ) filterFile=$OPTARG
            exportOpts="${exportOpts} -f $filterFile";;
        s ) superfamily=$OPTARG;;
        \? ) echo "Invalid option: -$theOption" >&2
             exit 1;;
        * ) echo "BUG: option $theOption not handled" >&2
             exit 1;;
    esac
done
shift $(($OPTIND - 1))
[ $# != 3 ] && {
    echo "Error: wrong number of args:
$usage" >&2
    exit 1
}
organism=$1
version=$2
db=$3

ensDb=ensembl_${organism}_${version}
ensDir=${organism}_${version}
echo organism $organism db $db lift=$liftfile filter=$filterFile
mkdir -p $ensDir done

set -x
[ -f done/download.done ] || {
    ensemblDownload $organism $version $ensDir
    touch done/download.done
}
[ -f done/import.done ] || {
    ensemblDbImport $organism $version $ensDir
    touch done/import.done
}
[ -f done/export.done ] || {
    ensemblExport $exportOpts $organism $version $db
    touch done/export.done
}
[ -f done/load.done ] || {
    ensemblLoad $organism $version $db
    touch done/load.done
}
[ $superfamily != "NO" -a ! -f done/superfamily.done ] && {
    ensemblSuperfamily $db $superfamily
    touch done/superfamily.done
}
[ $kg == 1 -a ! -f done/kgmap.done ] && {
    hgMapToGene $db ensGene knownGene knownToEnsembl
    touch done/kgmap.done
}
[ $dropdb == 1 ] && {
    echo "dropping database $ensDb in 5 seconds"
    sleep 5
    hgsql mysql -e "drop database if exists $ensDb"
    rm -f done/load.done
}

echo done
exit 0
