#!/bin/bash -e
usage="ensemblDownload - download ensembl or vega mysql tables
usage:
    ensemblDownload species version outdir
for example: ensemblDownload homo_sapiens core_45_36g homo_sapiens_45_36g
"
[ $# != 3 ] && {
    echo "Error: wrong number of args:
$usage" >&2
    exit 1
}
organism=$1
version=$2
outdir=$3
input="ftp://ftp.ensembl.org/pub/current_$organism/data/mysql/${organism}_${version}"
sql="$input/${organism}_${version}.sql.gz"
mkdir -p $outdir
cd $outdir
#get sql definition for all tables
wget --timestamping --no-verbose $sql 
#get list of tables excluding big alignment tables and others
zgrep CREATE ${organism}_${version}.sql.gz |awk -F'`' '{print $2}' > tables.tmp
exceptions="density|ditag|dna|dna_align_feature|oligo|protein_align_feature|repeat_feature"
tables=`cat tables.tmp |grep -v -E $exceptions`
#go get them! (if not already downloaded)
echo "Starting download to $outdir from $input. "
for i in $tables; do 
    wget --timestamping --no-verbose $input/${i}.txt.table.gz
done  
zcat seq_region.txt.table.gz | sed -e 's/GeneScaffold/scaffold/'  > seq_region.txt.table
gzip -f seq_region.txt.table
