This file describes how we made the browser database on the
Dec 12, 2000 freeze.


CREATING DATABASE AND STORING mRNA/EST SEQUENCE AND AUXILIARRY INFO 

o - ln -s /projects/hg3/gs.8/oo.33 ~/oo
o - Create the database.
     - ssh cc94
     - Enter mysql via:
           mysql -u hgcat -pbigsecret
     - At mysql prompt type:
	create database hg8;
	quit
     - make a semi-permanent read-only alias:
        alias hg8 mysql -u hguser -phguserstuff -A hg8
o - Make sure there is at least 5 gig free on cc94:/usr/local/mysql 
o - Store the mRNA (non-alignment) info in database.
     hgLoadRna new hg8
     hgLoadRna add hg8 /projects/hg2/mrna.126/mrna.fa /projects/hg2/mrna.126/mrna.ra
     hgLoadRna add hg8 /projects/hg2/mrna.126/est.fa /projects/hg2/mrna.126/est.ra
    The last line will take quite some time to complete.  It will count up to
    about 3,800,000 before it is done.

REPEAT MASKING 


o - Load RepeatMasker output into database:
      cd /projects/hg3/gs.8/oo.33
      hgLoadOut hg8 ?/*.fa.out ??/*.fa.out
        (Ignore the "Strange perc. field warnings.  Maybe mention them
	 to Arian someday.)

This will keep things going until the sensitive RepeatMasker run comes in
from Victor or elsewhere.  When it does:

o - Cd ~/oo/bed/rmskVictor
o - Unzip and process Victor's stuff with doMerge.sh and victor_merge_rm_out1.pl.
o - Remove fragments as so:
       cd out_tbl_oo33
       rm */ctg*/ctg*.fa.*.out

o - Copy in Victor's repeat masking runs over the contig runs in ~/oo/*/ctg*
       rm ~/oo.33/contigOut.zip
       zip ~/oo.33/contigOut.zip */ctg*/ctg*.fa.out
       cd ~/oo.33
       unzip contigOut.zip

o - Lift these up as so:
      - ssh kks00
      - cd ~/gs/oo.33
      - source jkStuff/liftOut2.sh

o - Load RepeatMasker output into database again:
      - ssh cc94
      - cd /projects/hg3/gs.8/oo.33
      - hgLoadOut hg8 ?/*.fa.out ??/*.fa.out
        (Ignore the "Strange perc. field warnings.  Maybe mention them
	 to Arian someday.)


STORING O+O SEQUENCE AND ASSEMBLY INFORMATION

o - Create packed chromosome sequence files and put in database
     mysql -A -u hgcat -pbigsecret hg8  < ~/src/hg/lib/chromInfo.sql
     cd /projects/hg3/gs.8/oo.33
     hgNibSeq hg8 /projects/hg3/gs.8/oo.33/nib ?/chr*.fa ??/chr*.fa


o - Store o+o info in database.
     cd /projects/hg3/gs.8/oo.33
     jkStuff/liftGl.sh ooGreedy.102.gl
     hgGoldGapGl hg8 /projects/hg3/gs.8 oo.33 
     cd /projects/hg3/gs.8
     hgClonePos hg8 oo.33 ffa/sequence.inf /projects/hg3/gs.8
   (Ignore warnings about missing clones - these are in chromosomes 21 and 22)
     hgCtgPos hg8 oo.33

o - Make and load GC percent table
     - login to cc94
     cd /projects/hg3/gs.8/oo.33/bed
     mkdir gcPercent
     cd gcPercent
     mysql -A -u hgcat -pbigsecret hg8  < ~/src/hg/lib/gcPercent.sql
     hgGcPercent hg8 ../../nib



MAKING AND STORING mRNA AND EST ALIGNMENTS

o - Load up the local disks of the cluster with refSeq.fa, mrna.fa and est.fa
    from /projects/hg2/mrna.126  into /var/tmp/hg/h/mrna

o - Use BLAT to generate refSeq, mRNA and EST alignments as so:
      ssh cc01
      cd ~/oo/jkStuff
      ls -1S /projects/hg3/gs.8/oo.33/?/ctg*/*.fa /projects/hg3/gs.8/oo.33/??/ctg*/*.fa > ctg.lst
      source setupPostBlat.sh
    check with checkPostBlat.sh.  Try uncommenting some of it next time....
      
o - Process refSeq mRNA and EST alignments into near best in genome.
      ssh kks00
      mkdir /projects/hg3/gs.8/oo.33/psl
      cd /projects/hg3/gs.8/oo.33/psl
      source ../jkStuff/postSort.sh
    (If the est alignments exceed 2 gig use ../jkStuff/postSortBig.sh instead)

o - Load mRNA alignments into database.
      - ssh cc94
      cd /projects/hg3/gs.8/oo.33/psl/mrna
      foreach i (*.psl)
          mv $i $i:r_mrna.psl
      end
      hgLoadPsl hg8 *.psl
      cd ..
      hgLoadPsl hg8 all_mrna.psl -nobin

o - Load EST alignments into database.
      ssh cc94
      cd /projects/hg3/gs.8/oo.33/psl/est
      foreach i (*.psl)
            mv $i $i:r_est.psl
      end
      hgLoadPsl hg8 *.psl
      cd ..
      hgLoadPsl hg8 all_est.psl -nobin

o - Create subset of ESTs with introns and load into database.
      - ssh kks00
      cd ~/oo
      source jkStuff/makeIntronEst.sh
      - ssh cc94
      cd ~/oo/psl/intronEst
      hgLoadPsl hg8 *.psl

PRODUCING KNOWN GENES (RefSeq)

o - Go to the Entrez browser at http://www.ncbi.nlm.nih.gov/Entrez/
    and select Nucleotide in the search box.  Go to Limits and select
    'exclude all of the above', Molecule: mRNA, Only from: RefSeq,
    and then put "Homo sapiens" [organism] in the search box and hit 
    go.  Tweak things so can save it to /projects/hg2/refseq.126/refseq.gb
    in GenBank flat file format.
o - Download ftp://ncbi.nlm.nih.gov/refseq/LocusLink/loc2ref and
    mim2loc to /projects/hg2/refseq.126
o - Similarly download refSeq proteins in fasta format to refSeq.pep
o - Align these by processes described under mRNA/EST alignments above.
    cd ~/oo/psl
    pslCat -check -nohead refSeq/*.psl | grep NM_ >/projects/hg2/refseq.126/known.oo33.psl
o - Produce refGene, refPep, refMrna, and refLink tables as so:
     ssh cc94
     cd /projects/hg2/refseq.126
     hgRefSeqMrna hg8 refSeq.fa refSeq.ra known.oo33.psl loc2ref refSeqPep.fa mim2loc
o - Add Jackson labs info
     cd ~/oo/bed
     mkdir jaxOrtholog
     cd jaxOrtholog
     ftp ftp://ftp.informatics.jax.org/pub/informatics/reports/HMD_Human3.rpt
     awk -f filter.awk *.rpt > jaxOrtholog.tab
    Load this into mysql with something like:
     mysql -u hgcat -pBIGSECRET hg8 < ~/src/hg/lib/jaxOrtholog.sql
     mysql -u hgcat -pBIGSECRET -A hg8
    and at the mysql> prompt
     load data local infile 'jaxOrtholog.tab' into table jaxOrtholog;

    


SIMPLE REPEAT TRACK

o - Execute the following (which will take about 
    8 hours).
	mkdir ~/oo/bed/simpleRepeat
	cd ~/oo/nib
	foreach i (*.nib)
	    echo processing $i
	    trfBig $i ~/oo/bed/simpleRepeat/$i -bed
	end
	cd ~/oo/bed/simpleRepeat
	rm *.nib
	cat c*.bed > all.bed
    Then log onto cc94 and
        cd ~/oo/bed/simpleRepeat 
	hg8 < ~/src/hg/lib/simpleRepeat.sql
    At the mysql> prompt type
        load data local infile 'all.bed' into table simpleRepeat
	quit

PRODUCING GENSCAN PREDICTIONS
    
o - Produce contig genscan.gtf genscan.pep and genscanExtra.bed files like so:
     ssh roar
     cd ~/oo.33
     source jkStuff/gsBig.sh &
    Wait about 3 1/2 days for these to finish.
     started 11:00 am Oct 5.
     finished by 9:00 pm Oct 8.

o - Convert these to chromosome level files as so:
     cd ~/oo.33
     mkdir bed/genscan
     liftUp bed/genscan/genscan.gtf jkStuff/liftAll.lft warn ?/ctg*/genscan.gtf ??/ctg*/genscan.gtf
     liftUp bed/genscan/genscanSubopt.bed jkStuff/liftAll.lft warn ?/ctg*/genscanSub.bed ??/ctg*/genscanSub.bed
     cat ?/ctg*/genscan.pep ??/ctg*/genscan.pep > bed/genscan/genscan.pep

o - Load into the database as so:
     ssh cc94
     cd ~/oo.33/bed/genscan
     ldHgGene hg8 genscan genscan.gtf
     hgPepPred hg8 generic genscanPep genscan.pep
     hgLoadBed hg8 genscanSubopt genscanSubopt.bed


PRODUCING MOUSE ALIGNMENTS

o - Download the lower-case-masked assembly and put it in
    kkstore:/cluster/store1/a2ms.
   
o - Make sure that contig/ctg*.fa.masked files exist already.
    (May need to go to ~/oo and unzip contigFaMasked.zip.
o - Mask simple repeats in addition to normal repeats with:
        mkdir ~/oo/jkStuff/trfCon
	cd ~/oo/allctgs
	/bin/ls -1 | grep -v '\.' > ~/oo/jkStuff/trfCon/ctg.lst
        cd ~/oo/jkStuff/trfCon
	mkdir err log out
    edit ~/oo/jkStuff/trf.gsub to update gs and oo version
	gensub ctg.lst ~/oo/jkStuff/trf.gsub
	mv gensub.out trf.con
	condor_submit trf.con
    wait for this to finish.  Check as so
        cd ~/oo
	source jkStuff/checkTrf.sh
    there should be no output.
o - Copy the RepeatMasked and trf masked genome to
    kkstore:/scratch/hg/gs.8/oo.33/contigTrf, and ask
    Jorge and all to binrsync to propagate the data
    across the new cluster.
o - Download the assembled mouse genome in lower-case
    masked form to /cluster/store1/arachne.3/whole.  
    Execute the script splitAndCopy.csh to chop it
    into roughly 50M pieces in arachne.3/parts
o - Set up the jabba job to do the alignment as so:
       ssh kkstore
       cd /cluster/store1/gs.8/oo.33
       mkdir blatMouse.phusion
       cd blatMouse.phusion
       ls -1S /scratch/hg/gs.3/oo.33/contigTrf/* > human.lst
       ls -1 /cluster/store1/arachne.3/parts/* > mouse.lst
    Make a file 'gsub' with the following three lines in it
#LOOP
/cluster/home/kent/bin/i386/blat -q=dnax -t=dnax {check in line+ $(path2)} {check in line+ $(path1)}  {check out line+ psl/$(root2)_$(root1).psl} -minScore=20 -minIdentity=20 -tileSize=4 -minMatch=2 -oneOff=0 -ooc={check in exists /scratch/hg/h/4.pooc} -qMask=lower -mask=lower
#ENDLOOP
    Process this into a jabba file and launch the first set
    of jobs (10,000 out of 70,000) as so:
        gensub2 mouse.lst human.lst gsub spec
	jabba make hut spec
	jabba push hut
    Do a 'jabba check hut' after about 20 minutes and make sure
    everything is right.  After that make a little script that
    does a "jabba push hut" followed by a "sleep 30" about 50
    times.  Interrupt script when you see jabba push say it's
    not pushing anything.

o - Sort alignments as so 
       ssh kkstore
       cd /cluster/store1/gs.8/oo.33/blatMouse
       pslCat -dir -check psl | liftUp -type=.psl stdout ../liftAll.lft warn stdin | pslSortAcc nohead chrom /cluster/store2/temp stdin
o - Get rid of big pile-ups due to contamination as so:
       cd chrom
       foreach i (*.psl)
           echo $i
           mv $i xxx
           pslUnpile -maxPile=600 xxx $i
       rm xxx
       end
o - Remove long redundant bits from read names by making a file
    called subs.in with the following line:
        gnl|ti^ti
        contig_^tig_
    and running the commands
        cd ~/mouse/vsOo33/blatMouse.phusion/chrom
	subs -e -c ^ *.psl > /dev/null
o - Copy over to network where database is:
        ssh kks00
	cd ~/oo/bed
	mkdir blatMouse
	mkdir blatMouse/ph.chrom600
	cd !$
        cp /cluster/store1/gs.8/oo.33/blatMouse.phusion/chrom/*.psl .
o - Rename to correspond with tables as so and load into database:
       ssh cc94
       cd ~/oo/bed/blatMouse/ph.chrom600
       foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_blatMouse.psl
       end
       hgLoadPsl hg8 *.psl
o - load sequence into database as so:
	ssh kks00
	faSplit about /projects/hg3/mouse/arachne.3/whole/Unplaced.mfa 1200000000 /projects/hg3/mouse/arachne.3/whole/unplaced
	ssh cc94
	hgLoadRna addSeq '-abbr=gnl|' hg8 /projects/hg3/mouse/arachne.3/whole/unpla*.fa
	hgLoadRna addSeq '-abbr=con' hg8 /projects/hg3/mouse/arachne.3/whole/SET*.mfa
    This will take quite some time.  Perhaps an hour .

o - Produce 'best in genome' filtered version:
        ssh kks00
	cd ~/mouse/vsOo33
	pslSort dirs blatMouseAll.psl temp blatMouse
	pslReps blatMouseAll.psl bestMouseAll.psl /dev/null -singleHit -minCover=0.3 -minIdentity=0.1
	pslSortAcc nohead bestMouse temp bestMouseAll.psl
	cd bestMouse
        foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_bestMouse.psl
        end
o - Load best in genome into database as so:
	ssh cc94
	cd ~/mouse/vsOo33/bestMouse
        hgLoadPsl hg8 *.psl

PRODUCING CROSS_SPECIES mRNA ALIGMENTS

Here you align vertebrate mRNAs against the masked genome on the
cluster you set up during the previous step.

o - Make sure that gbpri, gbmam, gbrod, and gbvert are downloaded from Genbank into
    /projects/hg2/genbank.126

o - Process these out of genbank flat files as so:
       ssh kkstore
       cd /cluster/store1/genbank.126
       mkdir ../mrna.126
       gunzip -c gbpri*.gz gbmam*.gz gbrod*.gz gbvrt*.gz gbinv*.gz | gbToFaRa ~/hg/h/xenoRna.fil ../mrna.126/xenoRna.fa ../mrna.126/xenoRna.ra ../mrna.126/xenoRna.ta stdin
       cd ../mrna.126
       faSplit sequence xenoRna.fa 2 xenoRna
       ssh kks00
       cd /projects/hg2
       mkdir mrna.126
       cp /cluster/store1/mrna.126/xenoRna.* mrna.126

o - Set up cluster run.  First make sure genome is in kks00:/scratch/hg/gs.8/oo.33/contigTrf
    in RepeatMasked + trf form.  (This is probably done already in mouse alignment
    stage).  Then do:
       ssh kkstore
       cd /cluster/store1/gs.8/oo.33
       mkdir xenoRna
       cd xenoRna
       mkdir err log psl
       ls -1S /scratch/hg/gs.8/oo.33/contigTrf > human.lst
       ls -1S /cluster/store1/mrna.126/xenoRna?*.fa > other.lst
       gensub2 human.lst other.lst all.gsub all.cod
       source all.cod

o - Sort alignments as so:
       ssh kkstore
       cd /cluster/store1/gs.8/oo.33/xenoRna
       pslSort dirs raw.psl /cluster/store2/temp psl
       pslReps raw.psl cooked.psl /dev/null -minAli=0.25
       liftUp chrom.psl ../liftAll.lft warn cooked.psl
       pslSortAcc nohead chrom /cluster/store2/temp chrom.psl
       pslCat -dir chrom > xenoMrna.psl
       rm -r chrom raw.psl cooked.psl chrom.psl

o - Load into database as so:
       ssh cc94
       cd ~/oo/jkStuff/post.xenoRna
       hgLoadPsl hg8 xenoMrna.psl -tNameIx
       cd /projects/hg2/mrna.126
       hgLoadRna add hg8 /projects/hg2/mrna.126/xenoRna.fa xenoRna.ra


PRODUCING FISH ALIGNMENTS

o - Download sequence from ... and put it in 
	ssh kks00
       /projects/hg3/fish/seq15jun2001/bqcnstn_0106151510.fa
    then
	ln -s /projects/hg3/fish ~/fish
    split this into multiple files and compress original with
	cd ~/fish/seq15jun2001
        faSplit sequence bq* 100 fish
	compress bq*
	copy over to kkstore:/cluster/store2/fish/seq15jun2001
o - Do fish/human alignments.
       ssh kkstore
       cd /cluster/store2/fish
       mkdir vsOo33
       cd vsOo33
       mkdir psl
       ls -1S /var/tmp/hg/gs.8/oo.33/tanMaskNib/*.fa.trf > human.lst
       ls -1S /projects/hg3/fish/seq15jun2001/*.fa > fish.lst
     Copy over gsub from previous version and edit paths to
     point to current assembly.
       gensub2 fish.lst human.lst gsub spec
       jabba make hut spec
       jabba try hut
     Make sure jobs are going ok.  Then
       jabba push hut
     wait about 2 hours and do another
       jabba push hut
     do a jabba check hut, and if necessary another push hut.
o - Sort alignments as so 
       pslCat -dir psl | liftUp -type=.psl stdout ~/oo/jkStuff/liftAll.lft warn stdin | pslSortAcc nohead chrom temp stdin
o - Copy to cc94:/scratch.  Rename to correspond with tables as so and 
    load into database:
       ssh cc94
       cd ~/fish/vsOo33/chrom
       foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_blatFish.psl
       end
       hgLoadPsl hg8 *.psl

SGP GENE PREDICTIONS
     mkdir ~/oo/bed/sgp
     cd ~/oo/bed/sgp
     mkdir download
     cd download
   Now download *.gtf and *.prot from 
   http://www1.imim.es/genepredictions/H.sapiens/golden_path_20010806/SGP/
   Get rid of the extra .N in the transcripts with subs.  
     cd ..
     cp ~/oo/bed/geneid/subs .
     subs -e download/*.gtf > /dev/null
     ldHgGene hg8 sgpGene download/*.gtf -exon=CDS
     hgPepPred hg8 generic sgpPep download/*.prot


TIGR GENE INDEX

  o - Download ftp://ftp.tigr.org/private/HGI_ren/*aug.tgz into
      ~/oo.29/bed/tgi and then execute the following commands:
          cd ~/oo.29/bed/tgi
	  mv cattleTCs_aug.tgz cowTCs_aug.tgz
	  foreach i (mouse cow human pig rat)
	      mkdir $i
	      cd $i
	      gtar -zxf ../${i}*.tgz
	      gawk -v animal=$i -f ../filter.awk * > ../$i.gff
	      cd ..
	  end
	  mv human.gff human.bak
	  sed s/THCs/TCs/ human.bak > human.gff
	  ldHgGene -exon=TCs hg7 tigrGeneIndex *.gff


      
LOADING IN EXTERNAL FILES

o - Load STSs (done)
     - login to cc94
      cd ~/oo/bed
      hg8 < ~/src/hg/lib/stsMap.sql
      mkdir stsMap
      cd stsMap
      bedSort /projects/cc/hg/mapplots/data/tracks/oo.33/stsMap.bed stsMap.bed
      - Enter database with "hg8" command.
      - At mysql> prompt type in:
          load data local infile 'stsMap.bed' into table stsMap;
      - At mysql> prompt type
          quit


o - Load chromosome bands: 
      - login to cc94
      cd /projects/hg3/gs.8/oo.33/bed
      mkdir cytoBands
      cp /projects/cc/hg/mapplots/data/tracks/oo.33/cytobands.bed cytoBands
      hg8 < ~/src/hg/lib/cytoBand.sql
      Enter database with "hg8" command.
      - At mysql> prompt type in:
          load data local infile 'cytobands.bed' into table cytoBand;
      - At mysql> prompt type
          quit

o  - Load mouseRef track.  
    First copy in data from kkstore to ~/oo/bed/mouseRef.  
    Then substitute 'genome' for the appropriate chromosome 
    in each of the alignment files.  Finally do:
       hgRefAlign webb hg8 mouseRef *.alignments

o - Load Avid Mouse tracks
      ssh cc98
      cd ~/oo/bed
      mkdir avidMouse
      cd avidMouse
      wget http://pipeline.lbl.gov/tableCS-LBNL.txt
      hgAvidShortBed *.txt avidRepeat.bed avidUnique.bed
      hgLoadBed avidRepeat avidRepeat.bed
      hgLoadBed avidUnique avidUnique.bed

o - Load in Exonerate alignments 
      ssh cc94
      cd ~/oo/bed
      mkdir exonerate
      cd exonerate
      wget ftp.ebi.ac.uk/pub/software/ensembl/mouse_vs_ens110.tar.gz
      tar -zxvf mouse*.gz
      cat mouse_gff/*.gff | hgExonerate -elia hg8 exoMouse stdin
       
o - Load in Fiberglass alignments 
      ssh cc94
      cd ~/oo/bed
      mkdir fiberglass
      cd fiberglass
      wget http://waldo.wi.mit.edu/~danb/Chr22/all_hits
      hgFiberglass hg8 all_hits

o - Load SNPs into database.
      - ssh cc94
      - cd ~/oo/bed
      - mkdir snp
      - cd snp
      - Download SNPs from ftp://ftp.ncbi.nlm.nih.gov/pub/sherry/gp.oo33.gz
      - Unpack.
        grep RANDOM gp.oo33 > snpTsc.txt
        grep MIXED  gp.oo33 >> snpTsc.txt
        grep BAC_OVERLAP  gp.oo33 > snpNih.txt
        grep OTHER  gp.oo33 >> snpNih.txt
        awk -f filter.awk snpTsc.txt > snpTsc.contig.bed
        awk -f filter.awk snpNih.txt > snpNih.contig.bed
        liftUp snpTsc.bed ../../jkStuff/liftAll.lft warn snpTsc.contig.bed
        liftUp snpNih.bed ../../jkStuff/liftAll.lft warn snpNih.contig.bed
	hgLoadBed hg8 snpTsc snpTsc.bed
	hgLoadBed hg8 snpNih snpNih.bed

o - Load cpgIslands (done)
     - login to cc94
     - cd /projects/hg3/gs.8/oo.33/bed
     - mkdir cpgIsland
     - cd cpgIsland
     - hg8 < ~kent/src/hg/lib/cpgIsland.sql
     - wget http://genome.wustl.edu:8021/pub/gsc1/achinwal/MapAccessions/cpg_aug.oo33.tar
     - tar -xf cpg*.tar
     - awk -f filter.awk */ctg*/*.cpg > contig.bed
     - liftUp cpgIsland.bed ../../jkStuff/liftAll.lft warn contig.bed
     - Enter database with "hg8" command.
     - At mysql> prompt type in:
          load data local infile 'cpgIsland.bed' into table cpgIsland

o - Load Ensembl genes:
     cd ~/oo/bed
     mkdir ensembl
     cd ensembl
     wget http://www.sanger.ac.uk/~birney/all_april_ctg.gtf.gz
     gunzip all*.gz
     liftUp ensembl110.gtf ~/oo/jkStuff/liftAll.lft warn all*.gtf
     ldHgGene hg8 ensGene en*.gtf
o - Load Ensembl peptides:
     (poke around ensembl to get their peptide files as ensembl.pep)
     (substitute ENST for ENSP in ensemble.pep with subs)
     wget ftp://ftp.ensembl.org/pub/current/data/fasta/pep/ensembl.pep.gz
     gunzip ensembl.pep.gz
   edit subs.in to read: ENSP|ENST
     subs -e ensembl.pep
     hgPepPred hg8 generic ensPep ensembl.pep

o - Load Sanger22 genes:
      - cd ~/oo/bed
      - mkdir sanger22
      - cd sanger22
      - not sure where these files were downloaded from
      - grep -v Pseudogene Chr22*.genes.gff | hgSanger22 hg8 stdin Chr22*.cds.gff *.genes.dna *.cds.pep 0
          | ldHgGene hg8 sanger22pseudo stdin
         - Note: this creates sanger22extras, but doesn't currently create
           a correct sanger22 table, which are replaced in the next steps
      - sanger22-gff-doctor Chr22.3.1x.cds.gff Chr22.3.1x.genes.gff \
          | ldHgGene hg8 sanger22 stdin
      - sanger22-gff-doctor -pseudogenes Chr22.3.1x.cds.gff Chr22.3.1x.genes.gff \
          | ldHgGene hg8 sanger22pseudo stdin

      - hgPepPred hg8 generic sanger22pep *.pep

o - Load Sanger20 genes:
     First download files from James Gilbert's email to ~/oo/bed/sanger20 and
     go to that directory while logged onto cc94.  Then:
        grep -v Pseudogene chr_20*.gtf | ldHgGene hg8 sanger20 stdin
	hgSanger20 hg8 *.gtf *.info


o - Load rnaGene table
      - login to cc94
      - cd ~kent/src/hg/lib
      - hg8 < rnaGene.sql
      - cd /projects/hg3/gs.8/oo.33/bed
      - mkdir rnaGene
      - cd rnaGene
      - download data from ftp.genetics.wustl.edu/pub/eddy/pickup/ncrna-oo27.gff.gz
      - gunzip *.gz
      - liftUp chrom.gff ../../jkStuff/liftAll.lft carry ncrna-oo27.gff
      - hgRnaGenes hg8 chrom.gff

o - Load exoFish table
     - login to cc94
     - cd /projects/hg3/gs.8/oo.33/bed
     - mkdir exoFish
     - cd exoFish
     - hg8 < ~kent/src/hg/lib/exoFish.sql
     - Put email attatchment from Olivier Jaillon (ojaaillon@genoscope.cns.fr)
       into /projects/hg3/gs.8/oo.33/bed/exoFish/all_maping_ecore
     - awk -f filter.awk all_maping_ecore > exoFish.bed
     - hgLoadBed hg8 exoFish exoFish.bed

o - Mouse synteny track
     - login to cc94.
     - cd ~/kent/src/hg/lib
     - hg8 < mouseSyn.sql
     - mkdir ~/oo/bed/mouseSyn
     - cd ~/oo/bed/mouseSyn
     - Put Dianna Church's (church@ncbi.nlm.nih.gov) email attatchment as
       mouseSyn.txt
     - awk -f format.awk *.txt > mouseSyn.bed
     - delete first line of mouseSyn.bed
     - Enter database with "hg8" command.
     - At mysql> prompt type in:
          load data local infile 'mouseSyn.bed' into table mouseSyn


o - Load Genie predicted genes and associated peptides.
     - cat */ctg*/ctg*.affymetrix.gtf > predContigs.gtf
     - liftUp predChrom.gtf ../../jkStuff/liftAll.lft warn predContigs.gtf
     - ldHgGene hg8 genieAlt predChrom.gtf

     - cat */ctg*/ctg*.affymetrix.aa > pred.aa
     - hgPepPred hg8 genie pred.aa 

     - hg8
         mysql> delete * from genieAlt where name like 'RS.%';
         mysql> delete * from genieAlt where name like 'C.%';

o - Load Softberry genes and associated peptides (done)
     - cd ~/oo/bed
     - mkdir softberry
     - cd softberry
     - get ftp://www.softberry.com/pub/SC_AUG01/*
     ldHgGene hg8 softberryGene chr*.gff
     hgPepPred hg8 softberry *.pro
     hgSoftberryHom hg8 *.pro

o - Load Acembly (done)
    - Get acembly.gff from Jean and Michelle Thierry-Miegs and
      place in ~/oo/bed/acembly
    - Replace c_chr with chr in acembly.gff
    - Replace G_t1_chr with chr and likewise
      G_t2_chr with chr, etc.
    - Load into database as so:
        ldHgGene hg8 acembly acembly.gff
        hgPepPred hg8 generic acemblyPep acembly.pep

o - Load genomic dupes
    ssh cc94
    cd ~/oo/bed
    mkdir genomicDups
    cd genomicDups
    wget http://codon/jab/web/takeoff/oo33_dups_for_kent.zip
    unzip *.zip
    awk -f filter.awk oo33_dups_for_kent > genomicDups.bed
    mysql -u hgcat -pbigSECRET hg8 < ~/src/hg/lib/genomicDups.sql
    hgLoadBed hg8 -oldTable genomicDups genomicDupes.bed

o Load Ingo Ebersber's chimp BLAT alignments
cd ~/oo
mkdir bed/chimpBlat
cd bed/chimpBlat

#!/bin/sh
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
do
  wget http://email.eva.mpg.de/~ebersber/custom_track_chimp/MPI-sg_aug01/chr${i}_gp_F06Aug01.psl
done

Remove the first line from each psl file. It is junk.
pslCat *.psl > chimpBlat.psl
hgLoadPsl hg8 chimpBlat.psl


FAKING DATA FROM PREVIOUS VERSION
(This is just for until proper track arrives.  Rescues about
97% of data  Just an experiment, not really followed through on).

o - Rescuing STS track:
     - log onto cc94
     - mkdir ~/oo/rescue
     - cd !$
     - mkdir sts
     - cd sts
     - bedDown hg3 mapGenethon sts.fa sts.tab
     - echo ~/oo/sts.fa > fa.lst
     - pslOoJobs ~/oo ~/oo/rescue/sts/fa.lst ~/oo/rescue/sts g2g
     - log onto cc01
     - cc ~/oo/rescue/sts
     - split all.con into 3 parts and condor_submit each part
     - wait for assembly to finish
     - cd psl
     - mkdir all
     - ln ?/*.psl ??/*.psl *.psl all
     - pslSort dirs raw.psl temp all
     - pslReps raw.psl contig.psl /dev/null
     - rm raw.psl
     - liftUp chrom.psl ../../../jkStuff/liftAll.lft carry contig.psl
     - rm contig.psl
     - mv chrom.psl ../convert.psl


