# for emacs: -*- mode: sh; -*-


# This file describes browser build for the Orangutan
# genome, July 2007
#
#	"$Id: ponAbe1.txt,v 1.4 2007/09/08 00:02:47 hiram Exp $"
#
######################################################################
## DOWNLOAD SEQUENCE (DONE - 2007-08-17 - Hiram)
    ssh kkstore01
    mkdir /cluster/store2/ponAbe1
    ln -s /cluster/store2/ponAbe1 /cluster/data/ponAbe1
    mkdir /cluster/data/ponAbe1/wustl
    cd /cluster/data/ponAbe1/wustl
    for F in supercontigs.agp.gz supercontigs.fa.gz contigs.fa.gz contigs.fa.qual.gz
do
    wget --timestamping \
ftp://genome.wustl.edu/pub/organism/Primates/Pongo_pygmaeus_abelii/assembly/Pongo_pygmaeus_abelii-2.0.2/output/${F} \
    -O ${F}
done

    ls -ogrt
# -rw-rw-r--  1 824149580 Jul 10 17:22 contigs.fa.gz
# -rw-rw-r--  1   9613417 Jul 10 17:23 supercontigs.agp.gz
# -rw-rw-r--  1 724013642 Jul 10 17:23 contigs.fa.qual.gz
# -rw-rw-r--  1 900117536 Jul 10 17:24 supercontigs.fa.gz

#######################################################################
## create config.ra and run makeGenomeDb.pl (DONE - 2007-08-21 - Hiram)
    ssh kkstore01
    cd /cluster/data/ponAbe1

    cat << '_EOF_' > ponAbe1.config.ra
# Config parameters for makeGenomeDb.pl:
db ponAbe1
scientificName Pongo pygmaeus abelii
commonName Orangutan
assemblyDate Jul. 2007
assemblyLabel WUSTL 2.0.2
orderKey 31
# mitoAcc gi:1743294
mitoAcc X97707.1
fastaFiles /cluster/data/ponAbe1/wustl/supercontigs.fa.gz
agpFiles /cluster/data/ponAbe1/wustl/supercontigs.agp.gz
# qualFiles /dev/null
dbDbSpeciesDir orangutan
'_EOF_'
    # << happy emacs
    time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
	-stop=agp ponAbe1.config.ra > makeGenomeDb.out 2>&1 &
    #	real    24m24.468s
    time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
	-continue=db ponAbe1.config.ra > db.continue.out 2>&1 &
    ## says it needs a clade
# add to the .ra file:
clade mammal
genomeCladePriority 11
    ## and continue
    time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
	-continue=dbDb ponAbe1.config.ra > dbDb.continue.out 2>&1 &
    # add the trackDb files to the source tree and to the trackDb/makefile

##########################################################################
# fetch photograph (DONE - 2007-08-21 - Hiram)
    mkdir /cluster/data/ponAbe1/photo
    cd /cluster/data/ponAbe1/photo
    wget --timestamping \
	http://www.genome.gov/Images/press_photos/highres/84-300.jpg \
	    -O nhgri.original.84-300.jpg
    convert -geometry 300x200 -quality 80 nhgri.original.84-300.jpg \
	Pongo_pygmaeus_abelii.jpg
    # check this .jpg image into the source tree browser/images/ directory
##########################################################################
## Repeat masker (DONE - 2007-08-22 - Hiram)
    ssh kkstore01
    ## use screen for this
    mkdir /cluster/data/ponAbe1/bed/RepeatMasker
    cd /cluster/data/ponAbe1/bed/RepeatMasker
    time nice -n +19 ~/kent/src/hg/utils/automation/doRepeatMasker.pl \
	-bigClusterHub=pk \
	-buildDir=/cluster/data/ponAbe1/bed/RepeatMasker ponAbe1 > do.out 2>&1 &
    #	real    1831m46.301s

##############################################################################
## simpleRepeat masking (DONE - 2007-09-05 - Hiram)
    ## create a kki kluster run
    ssh kkr1u00
    mkdir /iscratch/i/ponAbe1
    cd /iscratch/i/ponAbe1
    cp -p /cluster/data/ponAbe1/ponAbe1.unmasked.2bit .
    cp -p /cluster/data/ponAbe1/chrom.sizes .
    twoBitToFa ponAbe1.unmasked.2bit ponAbe1.unmasked.fa
    mkdir split
    #  split sequence into about 1000 files, each about 3,000,000 bases
    faSplit about ponAbe1.unmasked.fa 3000000 split/s_

    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress /iscratch/i/ponAbe1/ kkr${R}u00:/iscratch/i/ponAbe1/
done

    ssh kki
    mkdir -p /cluster/data/ponAbe1/bed/simpleRepeat/trf
    cd /cluster/data/ponAbe1/bed/simpleRepeat/trf

    cat << '_EOF_' > runTrf
#!/bin/csh -fe 
#
set C = $1:r
set SRC = /iscratch/i/ponAbe1/split/$C.fa
mkdir -p /scratch/tmp/$C
cp -p $SRC /scratch/tmp/$C/$C.fa
pushd /scratch/tmp/$C
/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $C.fa \
	/dev/null -bedAt=$C.bed -tempDir=/scratch/tmp/$C
popd
rm -f $C.bed
cp -p /scratch/tmp/$C/$C.bed .
rm -fr /scratch/tmp/$C
'_EOF_'
    # << happy emacs
    chmod +x runTrf

    cat << '_EOF_' > template
#LOOP
./runTrf $(path1) {check out line $(root1).bed}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls /iscratch/i/ponAbe1/split > part.list
    gensub2 part.list single template jobList
    para create jobList
    para try ... check ... push ... etc ...
# Completed: 894 of 894 jobs
# CPU time in finished jobs:      45858s     764.31m    12.74h    0.53d  0.001 y
# IO & Wait Time:                 10652s     177.53m     2.96h    0.12d  0.000 y
# Average job time:                  63s       1.05m     0.02h    0.00d
# Longest finished job:             855s      14.25m     0.24h    0.01d
# Submission to last job:          3742s      62.37m     1.04h    0.04d

    cat s_*.bed > ../simpleRepeat.bed
    cd ..
    awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed

    ssh hgwdev
    cd /cluster/data/ponAbe1/bed/simpleRepeat
    time nice -n +19 hgLoadBed ponAbe1 simpleRepeat \
      simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
    #	Loaded 1039128 elements of size 16

    nice -n +19 featureBits ponAbe1 simpleRepeat \
	> fb.simpleRepeat.ponAbe1.txt 2>&1 &
    cat fb.simpleRepeat.ponAbe1.txt
    #	119951362 bases of 3093799891 (3.877%) in intersection

    #	add the trfMask to the rmsk masked sequence to get our final
    #	masked sequence
    ssh kkstore01
    cd /cluster/data/ponAbe1
    time nice -n +19 cat bed/simpleRepeat/trfMask.bed \
	| twoBitMask -add -type=.bed ponAbe1.rmsk.2bit stdin ponAbe1.2bit
    #	measure it
    time nice -n +19 twoBitToFa ponAbe1.2bit stdout \
	| faSize stdin > faSize.ponAbe1.2bit.txt 2>&1
    grep masked faSize.ponAbe1.2bit.txt
    #	%48.60 masked total, %50.91 masked real

    ## clean up the /iscratch/i/ponAbe1/ directory
    ssh kkr1u00
    cd /iscratch/i/ponAbe1
    rm -fr *
    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress --delete --stats /iscratch/i/ponAbe1/ kkr${R}u00:/iscratch/i/ponAbe1/
done
    cd ..
    rmdir ponAbe1
    for R in 2 3 4 5 6 7 8
do
    ssh kkr${R}u00 rmdir /iscratch/i/ponAbe1
done

############################################################################
#  BLATSERVERS ENTRY (DONE - 2007-09-06 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ponAbe1", "blat13", "17784", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ponAbe1", "blat13", "17785", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

