# for emacs: -*- mode: sh; -*-

 
# This file describes building the browser database for the archaeal
# species Pyrococcus furiosus

# DOWNLOAD SEQUENCE FROM GENBANK (DONE 02/10/04)

    ssh eieo
    mkdir /cluster/store5/archae/pyrFur2
    ln -s /cluster/store5/archae/pyrFur2 /cluster/data/pyrFur2
    cd /cluster/data/pyrFur2
    wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.fna
    mv NC_003413.fna chr1.fa
     # Edit header of chr1.fa to '> pyrAer1'

# CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 02/10/04)

    ssh hgwdev
    echo 'create database pyrFur2' | hgsql ''
    cd /cluster/data/pyrFur2
    hgNibSeq pyrFur2 /cluster/data/pyrFur2/nib chr1.fa
    faSize -detailed chr1.fa > chrom.sizes
    mkdir -p /gbdb/pyrFur2/nib
    echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
	    | hgsql pyrFur2
    echo 'INSERT INTO dbDb \
        (name, description, nibPath, organism, \
                defaultPos, active, orderKey, genome, scientificName, \
                htmlPath, hgNearOk) values \
        ("pyrFur2", "Pyrococcus furiosus", "/gbdb/pyrFur2/nib", "P. furiosus", \
               "chr1:500000-550000", 1, 85, "Archae", \
                "Pyrococcus furiosus", "/gbdb/pyrFur2/html/description.html", \
                0);' \
      | hgsql -h genome-testdb hgcentraltest

    cd ~/kent/src/hg/makeDb/trackDb
    # edit the trackDb makefile

    # add the trackDb directories
    mkdir -p archae/pyrFur2
    cvs add archae
    cvs add archae/pyrFur2
    cvs commit

# GC PERCENT TRACK (DONE 02/10/04)

    ssh hgwdev
    mkdir -p /cluster/data/pyrFur2/bed/gcPercent
    cd /cluster/data/pyrFur2/bed/gcPercent
    hgsql pyrFur2 < ~/kent/src/hg/lib/gcPercent.sql
    hgGcPercent -win=2000 pyrFur2 ../../nib
    # edit ~/kent/src/hg/makeDb/trackDb/archae/trackDb.ra and add an entry for
    # GC percent for 2,000 base windows instead (simply cut and paste
    # ~/kent/src/hg/makeDb/trackDb/trackDb.ra).

# GC 20 BASE WIGGLE TRACK (DONE 8/25)

    mkdir /cluster/data/pyrFur2/bed/gc20Base
    cd /cluster/data/pyrFur2/bed/gc20Base
    mkdir wigData20 dataLimits20
    hgGcPercent -chr=chr -file=stdout -win=20 -overlap=19 pyrFur2 ../../nib | grep -w GC | \
    awk '
{
    bases = $3 - $2
    perCent = $5/10.0
    printf "%d\t%.1f\n", $2+1, perCent
}' | wigAsciiToBinary -dataSpan=1 -chrom=chr \
	-wibFile=wigData20/gc20Base_1 -name=1 stdin > dataLimits20/chr
    hgLoadWiggle pyrFur2 gc20Base wigData20/*.wig
    mkdir /gbdb/pyrFur2/wib
    ln -s `pwd`/wigData20/*.wib /gbdb/pyrFur2/wib

# GENBANK PROTEIN-CODING GENES (DONE 01/14/04)

    ssh hgwdev
    mkdir /cluster/data/pyrFur2/genbank
    cd /cluster/data/pyrFur2/genbank
    wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.gbk
    mv NC_003413.gbk pyrFur2.gbk
    # Create 3 files to assist parsing of the genbank
    # 1. for a bed file
    echo 'chr1
start
end
locus_tag
1000
strand' > pyrFur2-params-bed.txt
    # 2. for the peptide parts
    echo 'locus_tag
translation' > pyrFur2-params-pep.txt
    # 3. for the other gene information
    echo 'locus_tag
product
note' > pyrFur2-params-xra.txt
    # Now extract the genes and information:
    gbArchaeGenome pyrFur2.gbk pyrFur2-params-bed.txt pyrFur2-genbank-cds.bed
    gbArchaeGenome pyrFur2.gbk pyrFur2-params-pep.txt pyrFur2-genbank-cds.pep
    gbArchaeGenome pyrFur2.gbk pyrFur2-params-xra.txt pyrFur2-genbank-cds.xra
    hgLoadBed pyrFur2 gbProtCode pyrFur2-genbank-cds.bed
    hgsql pyrFur2 < ~/kent/src/hg/lib/pepPred.sql
    hgsql pyrFur2 < ~/kent/src/hg/lib/minGeneInfo.sql
    echo rename table pepPred to gbProtCodePep | hgsql pyrFur2
    echo rename table minGeneInfo to gbProtCodeXra | hgsql pyrFur2
    echo load data local infile \'pyrFur2-genbank-cds.pep\' into table gbProtCodePep | hgsql pyrFur2
    echo load data local infile \'pyrFur2-genbank-cds.xra\' into table gbProtCodeXra | hgsql pyrFur2

# TIGR GENES (DONE 02/10/04)
    # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
    # and fill out the web form as follows:
    #   - Pick "Retrieve attributes for the specified DNA feature within a specific 
    #     organism and/or a specific role category".
    #       * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" 
    #         from the 1st and 3rd box.
    #       * Select everything from "Choose TIGR Annotation Gene Attributes"
    #       * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
    #       * Select everything from "Choose Other Gene Attributes"
    #   - Click submit, and click save as tab-delimited file.
    ssh hgwdev
    mkdir /cluster/data/pyrFur2/bed/tigrCmrORFs
    cp pyrFur2-tigr.tab /cluster/data/pyrFur2/bed/tigrCmrORFs
    cd /cluster/data/pyrFur2/bed/tigrCmrORFs
    tigrCmrToBed pyrFur2-tigr.tab pyrFur2-tigr.bed
    hgLoadBed -tab pyrFur2 tigrCmrGene pyrFur2-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql
    echo RENAME TABLE tigrCmrGene to tigrCmrORFs | hgsql pyrFur2

# Lowe Lab Microarrays (DONE 02/10/04)
    ssh hgwdev
    mkdir /cluster/data/pyrFur2/bed/llaPfuPrintA
    cp Pfu-arrays.{bed,exps} /cluster/data/pyrFur2/bed/llaPfuPrintA
    cd /cluster/data/pyrFur2/bed/llaPfuPrintA
    hgLoadBed pyrFur2 llaPfuPrintA Pfu-arrays.bed
    hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql
    echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql pyrFur2
    echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql pyrFur2
    # for now I'm using both hgFixed and the pyrFur2 database for this... that will change.
    hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql
    echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql hgFixed
    echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql hgFixed

# CHANGE "chr1" to "chr"

    ssh hgdev
    mv /cluster/data/pyrFur2/nib/chr1.nib /cluster/data/pyrFur2/nib/chr.nib
    rm /gbdb/pyrFur2/nib/chr1.nib
    ln -s /cluster/data/pyrFur2/nib/chr.nib /gbdb/pyrFur2/nib/chr.nib
    # a quick script to replace chr1 with chr

#!/bin/bash
sed 's/chr1/chr/g' $1 > /tmp/whatever
mv /tmp/whatever $1

    cd /cluster/data/pyrFur2/bed 
    find -name '*.bed' | xargs changeCh.sh
    # Now change the DB
    cd /tmp
    hgsqldump pyrFur2 | sed 's/chr1/chr/g' > pfuNew.sql
    echo drop database pyrFur2 | hgsql
    echo create database pyrFur2 | hgsql
    hgsql pyrFur2 < pfuNew.sql
    rm pfuNew.sql
    echo 'update dbDb set defaultPos="chr:550000-580000" where name="pyrFur2"' | hgsql -h genome-testdb hgcentraltest

# promoter track

    ssh hgwdev
    mkdir -p /cluster/data/pyrFur2/wiggle/promoterScanPos
    cd /cluster/data/pyrFur2/wiggle/promoterScanPos    
    cp plus.wig.gz .
    wigAsciiToBinary -chrom=chr -wibFile=promoterScanPos plus.wig.gz
    hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanPos pyrFur2 promoterScanPos promoterScanPos.wig
    mkdir /gbdb/pyrFur2/wib/promoterScanPos
    ln -s `pwd`/*.wib  /gbdb/pyrFur2/wib/promoterScanPos/
    mkdir /cluster/data/pyrFur2/promoterScanNeg
    cd /cluster/data/pyrFur2/promoterScanNeg    
    cp minus.wig.gz .
    wigAsciiToBinary -chrom=chr -wibFile=promoterScanNeg minus.wig.gz
    hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanNeg pyrFur2 promoterScanNeg promoterScanNeg.wig
    mkdir /gbdb/pyrFur2/wib/promoterScanNeg
    ln -s `pwd`/*.wib  /gbdb/pyrFur2/wib/promoterScanNeg/

# shine DG track

    cd /cluster/data/pyrFur2/wiggle
    mkdir shineDGPos
    mkdir shineDGNeg
    cd shineDGPos
    cp shine.pos.gz .
    wigAsciiToBinary -chrom=chr -wibFile=shineDGPos shine.pos.gz
    hgLoadWiggle pyrFur2 shineDGPos shineDGPos.wig
    cd ../shineDGNeg
    cp shine.neg.gz .
    wigAsciiToBinary -chrom=chr -wibFile=shineDGNeg shine.neg.gz
    hgLoadWiggle pyrFur2 shineDGNeg shineDGNeg.wig
    cd /gbdb/pyrFur2/wib  
    ln -s /cluster/data/pyrFur2/wiggle/shineDGPos/shineDGPos.wib shineDGPos.wib
    ln -s /cluster/data/pyrFur2/wiggle/shineDGNeg/shineDGNeg.wib shineDGNeg.wib
    

# Rfam TRACK

   cd /cluster/data/pyrFur2/bed
   mkdir Rfam
   cd Rfam/
   # file
   cp pfu.rfam.bed .
   hgLoadBed pyrFur2 Rfam pfu.rfam.bed
   # trackDb entry:
track Rfam
shortLabel Rfam
longLabel Rfam noncoding RNA
group genes
priority 2.3
visibility pack
color 43,127,60
type bed 6 .

# Heatshock array track

   cd /cluster/data/pyrFur2/bed
   mkdir llaPfuPrintC
   cd llaPfuPrintC/
   cp Pfu-hs.bed .
   cp Pfu-hs.other .
   hgLoadBed pyrFur2 llaPfuPrintC Pfu-hs.bed
   hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql
   echo 'rename table expRecord to llaPfuPrintCExps' | hgsql pyrFur2
   hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql
   echo 'rename table expRecord to llaPfuPrintCExps' | hgsql hgFixed
   echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql pyrFur2
   echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql hgFixed
   # trackDb entry:
track llaPfuPrintC
shortLabel HS Microarray
longLabel Heat Shock full-genome P. furiosus microarray experiments
group regulation
priority 86.0
visibility dense
type expRatio
expScale 4.0
expStep 0.5
expTable llaPfuPrintCExps
chip printA
canPack off

# Matt's Operons

cd /cluster/data/pyrFur2/bed/llOperons
cp matt.bed .
hgLoadBed pyrFur2 llOperons matt.bed

# RNA genes

cd /cluster/data/pyrFur2/bed
mkdir rnaGenes
cp pfuAllRNA.bed .
cp ~/kent/src/hg/lib/rnaGenes.sql .
hgLoadBed -sqlTable=rnaGenes.sql pyrFur2 rnaGenes pfuAllRNA.bed

# EasyGene

cd /cluster/data/pyrFur2/bed
mkdir easyGene
cd easyGene
wget -O easyGene.gff "http://www.binf.ku.dk/cgi-bin/easygene/output?id=129&format=gff&desc=CDS&desc=ORF&desc=ALT&r_cutoff=2&start=&stop=&strand=plus&strand=minus&per_page=-1&250="
pernilleGffToBed easyGene.gff easyGene.bed
cp ~/kent/src/hg/lib/easyGene.sql .
hgLoadBed -sqlTable=easyGene.sql pyrFur2 easyGene easyGene.bed

# MULTIZ with P. hori, P. abyssi, T. kodakarensis
# This is redo of an earlier run, so some files are already in place
# DONE (10/8/05), kpollard

    cd /cluster/data/pyrFur2/bed/conservation
    mv Thermokoda.chr therKoda1.chr
    #edit headers to be pyrFur2, pyrHor1, pyrAby1, therKoda1
    /cluster/bin/i386/faToNib pyrFur2.chr pyrFur2.chr.nib
    /cluster/bin/i386/faToNib pyrHor1.chr pyrHor1.chr.nib
    /cluster/bin/i386/faToNib pyrAby1.chr pyrAby1.chr.nib
    /cluster/bin/i386/faToNib therKoda1.chr therKoda1.chr.nib
    faSize -detailed *.chr > chrom.sizes
    #blastz
    blastz pyrFur2.chr pyrAby1.chr Q=HoxD55.q > pyrFur2-pyrAby1.lav
    blastz pyrFur2.chr pyrHor1.chr Q=HoxD55.q > pyrFur2-pyrHor1.lav
    blastz pyrFur2.chr therKoda1.chr Q=HoxD55.q > pyrFur2-therKoda1.lav
    #lavToAxt
    /cluster/bin/i386/lavToAxt pyrFur2-pyrAby1.lav . . pyrFur2-pyrAby1.axt
    /cluster/bin/i386/lavToAxt pyrFur2-pyrHor1.lav . . pyrFur2-pyrHor1.axt
    /cluster/bin/i386/lavToAxt pyrFur2-therKoda1.lav . . pyrFur2-therKoda1.axt
    #axtBest
    axtBest pyrFur2-pyrAby1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrAby1-best.axt
    axtBest pyrFur2-pyrHor1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrHor1-best.axt
    axtBest pyrFur2-therKoda1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-therKoda1-best.axt
    #axtToMaf
    axtToMaf pyrFur2-pyrAby1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrAby1.maf
     axtToMaf pyrFur2-pyrHor1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrHor1.maf
     axtToMaf pyrFur2-therKoda1-best.axt chrom.sizes chrom.sizes pyrFur2-therKoda1.maf
    #multiz
    #delete extra header lines from maf files
    multiz pyrFur2-pyrAby1.maf pyrFur2-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1.maf
    multiz pyrFur2-therKoda1.maf pyrFur2-pyrAby1-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1-therKoda1.maf
    #phyloHMM
    /cluster/bin/phast/msa_view -i MAF -M pyrFur2.chr -o SS pyrFur2-pyrAby1-pyrHor1-therKoda1.maf > pyrFur2.ss
    /cluster/bin/phast/phyloFit -i SS pyrFur2.ss -t "((pyrFur2,(pyrAby1,pyrHor1)),therKoda1)" 
    /cluster/bin/phast/msa_view -i SS pyrFur2.ss --summary-only
    /cluster/bin/phast/phastCons pyrFur2.ss phyloFit.mod --gc 0.4496 \
	--target-coverage 0.7 --estimate-trees pyr-tree \
	--expected-lengths 25 --no-post-probs --ignore-missing \
	--nrates 1,1
    /cluster/bin/phast/phastCons pyrFur2.ss \
	pyr-tree.cons.mod,pyr-tree.noncons.mod \
	--target-coverage 0.7 --expected-lengths 25 \
	--viterbi pyrFur2-elements.bed --score \
	--require-informative 0 --seqname chr > cons.dat
    wigEncode cons.dat phastCons.wig phastCons.wib
    draw_tree phyloFit.mod > pyr-tree.ps
    #make ai and jpg files in Illustrator
    cp pyr-tree.jpg /usr/local/apache/htdocs/images/
    #move data
    mkdir wib
    mv phastCons.wib wib/phastCons.wib
    mv phastCons.wig wib/phastCons.wig
    ln -s /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wib /gbdb/pyrFur2/wib
    mkdir -p /gbdb/pyrFur2/pwMaf
    mkdir -p otherSpp/pyrAby1 otherSpp/pyrHor1 otherSpp/therKoda1
    mv pyrFur2-pyrAby1.maf otherSpp/pyrAby1/chr.maf
    mv pyrFur2-pyrHor1.maf otherSpp/pyrHor1/chr.maf
    mv pyrFur2-therKoda1.maf otherSpp/therKoda1/chr.maf
    ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrAby1 /gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf
    ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrHor1 /gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf
    ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/therKoda1 /gbdb/pyrFur2/pwMaf/therKoda1_pwMaf
    mkdir multiz
    mv pyrFur2-pyrAby1-pyrHor1-therKoda1.maf multiz/chr.maf
    ln -s /cluster/data/pyrFur2/bed/conservation/multiz /gbdb/pyrFur2/multizPf2Pa1Ph1Tk1
    #load
    hgLoadWiggle pyrFur2 phastCons /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wig
    hgLoadMaf -warn pyrFur2 multizPf2Pa1Ph1Tk1
    hgLoadMaf -warn pyrFur2 pyrAby1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf
    hgLoadMaf -warn pyrFur2 pyrHor1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf
    hgLoadMaf -warn pyrFur2 therKoda1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/therKoda1_pwMaf
    hgLoadBed pyrFur2 phastConsElements pyrFur2-elements.bed 
    #trackDb
    cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2
    #trackDb.ra entry
    # track multizPf2Pa1Ph1Tk1
    # shortLabel Conservation
    # longLabel Pyrococcus 4-way multiz alignments
    # group compGeno
    # priority 10.0
    # visibility pack
    # type wigMaf 0.0 1.0
    # maxHeightPixels 100:40:11
    # wiggle phastCons
    # yLineOnOff Off
    # autoScale Off
    # pairwise pwMaf
    # speciesOrder pyrAby1 pyrHor1 therKoda1
    cvs commit -m "New multiz track" trackDb.ra
    #html page for multizPf2Pa1Ph1Tk1
    cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2
    mv multizPyro.html multizPf2Pa1Ph1Tk1.html
    cvs remove multizPyro.html
    cvs add multizPf2Pa1Ph1Tk1.html
    cvs commit -m "Details page for multiz track" multizPf2Pa1Ph1Tk1.html
#KEGG
    cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2/bed
    mkdir kegg
    cd kegg
    ~/kent/src/hg/protein/KGpath.sh pyrFur2  pyrFur2  051019

### RNA LP FOLD (Daryl Thomas and Matthias Hoechsmann; September 7, 2006)

    set workDir = /cluster/data/pyrFur2/bed/rnaLpFold
    mkdir -p $workDir
    cd $workDir

    set file = pyrFur2_dna.bed
    sed 's/chr1/chr/' ~mhoechsm/transfer/${file} >! ${file}
#    cp ~mhoechsm/transfer/${file} ${file}
    awk '{if ($3-$2>max) max=$3-$2} END {print max}' ${file}

    set db = pyrFur2
    hgsql ${db} -e "drop table rnaLpFold"
    hgsql ${db} < ${HOME}/kent/src/hg/lib/rnaLpFold.sql
    hgLoadBed -noSort -oldTable -strict ${db} rnaLpFold ${file}
    rm -f bed.tab

    ###

