
# braney 2010-12-13 get distribution mentioned in Felix's mail
wget "ftp://ftp.sanger.ac.uk/pub/gencode/release_5/gencode5_GRCh37.tgz"

# braney 2010-12-22 get new distribution with level 1 pseudogenes marked
# properly
cd /hive/groups/encode/dcc/data/gencodeV5
wget "ftp://ftp.sanger.ac.uk/pub/gencode/release_5/gencode5_GRCh37.tgz"

cd gencode5

cat gencode.v5.annotation.level_1_2.classes gencode.v5.annotation.level_3.classes gencode.v5.polyAs.classes gencode.v5.2wayconspseudos.GRCh37.classes | hgLoadSqlTab hg19 wgEncodeGencodeClassesV5 ~/kent/src/hg/lib/gencodeGeneClass.sql stdin

gtfToGenePred  -genePredExt -geneNameAsName2 gencode.v5.annotation.level_3.gtf stdout | sed 's/ENSTR/ENST0/' > gencode.v5.annotation.level_3.gp

cat gencode.v5.annotation.level_3.gp | hgLoadGenePred -genePredExt hg19 wgEncodeGencodeAutoV5 stdin

tawk '$3=="transcript"{$3="exon"}{print $0}' gencode.v5.2wayconspseudos.GRCh37.gtf |gtfToGenePred -genePredExt stdin gencode.v5.2wayconspseudos.GRCh37.gp
hgLoadGenePred -genePredExt hg19 wgEncodeGencode2wayConsPseudoV5 gencode.v5.2wayconspseudos.GRCh37.gp

gtfToGenePred  -genePredExt -geneNameAsName2 gencode.v5.annotation.level_1_2.gtf stdout | sed 's/ENSTR/ENST0/' > gencode.v5.annotation.level_1_2.gp
hgLoadGenePred -genePredExt hg19 wgEncodeGencodeManualV5 gencode.v5.annotation.level_1_2.gp

tawk '
$3=="polyA_site"{$3="exon"}
$3=="polyA_signal"{$3="exon"}
$3=="pseudo_polyA"{$3="exon"}
{print $0}' gencode.v5.polyAs.gtf | gtfToGenePred -genePredExt stdin gencode.v5.polyAs.gp
hgLoadGenePred -genePredExt hg19 wgEncodeGencodePolyaV5 gencode.v5.polyAs.gp

cd metadata
cat > map << _EOF_
gencode_Exon_supporting_feature gencodeExonSupport.sql wgEncodeGencodeExonSupportV5
gencode_HGNC gencodeGeneSymbol.sql wgEncodeGencodeGeneSymbolV5
gencode_PDB gencodePdb.sql wgEncodeGencodePdbV5
gencode_Pubmed_id gencodePubMed.sql wgEncodeGencodePubmedV5
gencode_RefSeq gencodeRefSeq.sql wgEncodeGencodeRefSeqV5
gencode_Source gencodeSource.sql wgEncodeGencodeSourceV5
gencode_SwissProt gencodeUniProt.sql wgEncodeGencodeUniProtV5
gencode_Transcript_supporting_feature gencodeTranscriptSupport.sql wgEncodeGencodeTranscriptSupportV5
_EOF_

for i in gencode*; 
do 
set `grep $i map`;  
echo "hgLoadSqlTab hg19 $3 /hive/groups/encode/dcc/data/gencodeV5/sql/$2 $1"
done > loadMetaJobs.txt

for i in gencode*; 
do 
set `grep $i map`;  
echo "drop table $3;"
done > dropMetaTableCommands.txt

sh -x  loadMetaJobs.txt

