#########################################################################
# Cavia Porcellus -- Guinea Pig
# Broad cavPor2 2X release

#########################################################################
# Download sequence (2006-03-03 kate)

    ssh kkstore05
    mkdir -p /cluster/store12/cavPor2
    ln -s /cluster/store12/cavPor2 /cluster/data/cavPor2 
    cd /cluster/data/cavPor2
    mkdir downloads
    cd downloads
    wget -r ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor2
    mv ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor2/* .
    rm -fr ftp.broad.mit.edu

#########################################################################
# Create .ra file and run makeGenomeDb.pl
    ssh hgwdev
    cd /cluster/data/cavPor2
    cat << '_EOF_' >cavPor2.config.ra
# Config parameters for makeGenomeDb.pl:
db cavPor2
clade mammal
genomeCladePriority 35
scientificName Cavia porcellus
commonName GuineaPig
assemblyDate Oct. 2005
assemblyLabel Broad Institute cavPor2 (Draft_v2)
orderKey 99
#mitoAcc AJ222767
mitoAcc 5679797
fastaFiles /cluster/data/cavPor2/downloads/assembly.bases.gz
agpFiles /cluster/data/cavPor2/downloads/assembly.agp
qualFiles /cluster/data/cavPor2/downloads/scaffold.lifted.qac
dbDbSpeciesDir lizard
'_EOF_'

    makeGenomeDb.pl -verbose=2 -stop=seq cavPor2.config.ra > makeGenomeDb.out &

    # Need dbDb entry for name lookup
    hgsql -e 'INSERT INTO dbDb (name, description, nibPath, organism, \
        defaultPos, active, orderKey, genome, scientificName, \
        htmlPath, hgNearOk, hgPbOk, sourceName) \
        VALUES("cavPor2", "Oct. 2005", "/gbdb/cavPor2", "GuineaPig", \
        "", 0, 99, "GuineaPig", \
        "Cavia porcellus", "", 0, 0, \
        "Broad Institute cavPor2 (Draft_v2)")' -h localhost hgcentraltest

################################################
## WINDOWMASKER (2006-03-04 kate)
    ssh kkstore05
    cd /cluster/data/cavPor2/bed/
    nice /cluster/bin/scripts/doWindowMasker.pl cavPor2 \
        -workhorse=kolossus >& wmRun.log &
            # 10 hours -- requested Andy alter script to only
            # perform sdust run -- this should halve the time

    ln -s WindowMasker.2007-03-04 WMRun
    mv wmRun.log WMRun
    cd WMRun

    # upper-case n's left by WM (request to Andy to fix this)
    twoBitToFa cavPor2.wmsk.sdust.2bit stdout | tr n N | \
                faToTwoBit stdin /cluster/data/cavPor2/cavPor2.2bit

    # stats on masking
    cd /cluster/data/cavPor2
    twoBitToFa cavPor2.2bit stdout | nice faSize stdin
# 3403705911 bases (1454046733 N's 1949659178 real 1277828007 upper 671831171 lower) in 295515 sequences in 1 files
# Total size: mean 11517.9 sd 25705.3 min 201 (scaffold_113479) max 1027677 (scaffold_289404) median 4132
# N count: mean 4920.4 sd 14535.3
# U count: mean 4324.1 sd 9878.2
# L count: mean 2273.4 sd 4659.3

    # 34.4 % masked
    # Mouse and rat are ~43%, from RM.  TRF gives them an extra 2%, skip here

    mkdir -p /san/sanvol1/scratch/cavPor2
    cp -p cavPor2.2bit chrom.sizes /san/sanvol1/scratch/cavPor2

################################################
# DOWNLOADS (2007-06-05 kate)

    ssh kkstore05
    cd /cluster/data/cavPor2
    mkdir bigZips
    cd bigZips
    nice twoBitToFa ../cavPor2.2bit cavPor2.fa
    cp ../downloads/assembly.agp cavPor2.agp
    nice gzip cavPor2.fa cavPor2.agp
    md5sum *.gz > md5sum.txt

    ssh hgwdev
    set d = /usr/local/apache/htdocs/goldenPath
    set bd = /cluster/data/cavPor2
    cp $d/sorAra1/bigZips/README.txt $bd/bigZips
    # EDIT
    mkdir -p $d/cavPor2/bigZips
    ln -s $bd/bigZips/{*.gz,md5sum.txt,README.txt} $d/cavPor2/bigZips
   
