##########################################################################
# CREATE HIVA244 DATABASE (STARTED 9/5/08 DONE 9/14/08, Fan) 

# for emacs: -*- mode: sh; -*-

    ssh hiv1

    mkdir /hive/groups/gsid/medical/hiv/hiva244
    cd /hive/groups/gsid/medical/hiv/hiva244

# Receive data file, reference.fasta, from Keith Crandall dated 8/29/08,
# keep the A244 part and name it hivA244.fa.

    # translate to nib
    mkdir nib
    cp Vp hivA244.fa chr1.fa
    faToNib chr1.fa nib/chr1.nib

CREATING DATABASE 

    # Create the hiva244 database.

    hgsql hivA2442 -e 'create database hiva244'

# CREATING GRP TABLE FOR TRACK GROUPING 
    
    echo "create table grp (PRIMARY KEY(NAME)) select * from hivA2442.grp" \
      | hgsql hiva244

# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION  

    mkdir -p /gbdb/hiva244/nib
    cp -p nib/chr1.nib /gbdb/hiva244/nib

    # Load /gbdb/hiva244/nib paths into database and save size info.
    hgsql hiva244  < ~/src/hg/lib/chromInfo.sql

    hgNibSeq -preMadeNib hiva244 /gbdb/hiva244/nib /gbdb/hiva244/nib/chr1.fa

    echo "select chrom,size from chromInfo" | hgsql -N hiva244 > chrom.sizes

# MAKE HGCENTRALHIV1 ENTRY AND TRACKDB TABLE FOR HIVA244 
    echo 'insert into defaultDb values("HIV A244 (GP120)", "hiva244");' \
      | hgsql -h localhost hgcentralhiv1

    echo 'insert into dbDb values("hiva244", "Sept. 2008", \
          "/gbdb/hiva244/nib", "HIV A244 (GP120)", "chr1", 1, 2050, \
    "HIV A244 (GP120)","Human immunodeficiency     virus 1", \
    "/gbdb/hiva244/html/description.html", 0, 0, " sequence as of Oct., 2008");' \
      | hgsql hgcentralhiv1 -h localhost

    echo 'insert into genomeClade values("HIV A244 (GP120)", "other", 200);'\
      | hgsql hgcentralhiv1 -h localhost

    # Edit that makefile to add hiva244 in all the right places

    cd ~src/hg/makeDb/trackDb
    vi makefile

    mkdir Vp hiv/hiva244
    cp Vp hiv/hivmn2/trackDb hiv/hiva244

    make update
    make alpha update DBS=hiva244

    cvs update
    cvs commit makefile

# MAKE hgcentralhiv1 blatServers ENTRY FOR hiva244

# Ask admin to start BLAT server processes

   echo 'insert into blatServers values("hiva244", "hiv1", "17796", "1", "0"); \
        insert into blatServers values("hiva244", "hiv1", "17797", "0", "0");' \
      | hgsql hgcentralhiv1 -h localhost

# CREATE TRACKDB TABLE

    cd src/hg/makeDb/trackDb/hiv
    mkdir hiva244
    cvs add hiva244
    cd hiva244
    cp ../hivmn2/trackDb.ra . Vp
    cvs add trackDb.ra
    cvs commit trackDb.ra
    cd ~/src/hg/makeDb/trackDb
    make update DBS=hiva244
    make alpha update DBS=hiva244

# copy over tables from hivVax003Vax004

    cd /hive/groups/gsid/medical/hiv/hiva244
    echo 'cp -p /data/mysql/hivVax003Vax004/$1.* /data/mysql/hiva244' >copy1Table
    chmod +x copy1Table

    copy1Table dnaSeq
    copy1Table aaSeq
    copy1Table gsidSubjInfo
    copy1Table gsidClinicRec
    copy1Table gsIdXref
    copy1Table vax003AEMsa
    copy1Table vax003BMsa
    copy1Table vax003BMsa
    copy1Table vax004Msa

# CREATE VAX004 TRACK 

    cd /hive/groups/gsid/medical/hiv/hiva244
    mkdir vax004
    cd vax004

# get vax004 sequences
    hgsql hiva244 -N -e 'select * from dnaSeq where id like "%U%"' >vax004.tab

# create .fa file
    tabToFa vax004

    mkdir -p /gbdb/hiva244/vax004
    cp -p vax004.fa /gbdb/hiva244/vax004/vax004.fa

    hgLoadSeq -replace hiva244 /gbdb/hiva244/vax004/vax004.fa

# BLAT
    gfClient -minScore=200 -minIdentity=80 -nohead hiv1.cse.ucsc.edu 17797  /gbdb/hiva244/nib \
    -out=psl -t=dna -q=dna vax004.fa vax004.psl

# count the result
    wc *.psl
    cut -f 10 vax004.psl |wc
    cut -f 10 vax004.psl |sort -u |wc

# load the psl result into vax004 table
    hgLoadPsl hiva244 vax004.psl

# hgLoadPsl has some file permission problem.  Finish this by manually load the psl.tab file.
    hgsql hivA244 -e 'load data local infile "psl.tab" into table vax004'

# CREATE VAX003 TRACK 

    cd /hive/groups/gsid/medical/hiv/hiva244
    mkdir vax003
    cd vax003

# get vax003 sequences
    hgsql hiva244 -N -e 'select * from dnaSeq where id like "%T%"' >vax003.tab

# create .fa file
    tabToFa vax003

    mkdir -p /gbdb/hiva244/vax003
    cp -p vax003.fa /gbdb/hiva244/vax003/vax003.fa

    hgLoadSeq -replace hiva244 /gbdb/hiva244/vax003/vax003.fa

# BLAT
    gfClient -minScore=200 -minIdentity=80 -nohead hiv1.cse.ucsc.edu 17797  /gbdb/hiva244/nib \
    -out=psl -t=dna -q=dna vax003.fa vax003.psl

# count the result
    wc *.psl
    cut -f 10 vax003.psl |wc
    cut -f 10 vax003.psl |sort -u |wc

# load the psl result into vax003 table
    hgLoadPsl hiva244 vax003.psl

# hgLoadPsl has some file permission problem.  Finish this by manually load the psl.tab file.
    hgsql hiva244 -e 'load data local infile "psl.tab" into table vax003'

#CREATE HIVGENE TRACK

    cd /hive/groups/gsid/medical/hiv/hiva244

    mkdir hivGene
    cd hivGene

# CREATE hivGene TRACK 

# during initial hiv1 build, landMark.txt was created by:   
# copy the sequence data from HIV Sequence Database web page:
# http://www.hiv.lanl.gov/content/sequence/HIV/REVIEWS/HXB2.html
# create raw sequence file, landMark.txt

# copy over this file

    cp Vp /hive/groups/gsid/medical/hiv/hiv1/hivGene/landMark.txt .

# Process landMark.txt into hivGene.fa

    faToTab -type=protein landMark.txt landMark.tab
    cut -f 1 landMark.tab |toLower stdin j1
    cut -f 2 landMark.tab >j2
    paste j1 j2 >hivGene.tab
    rm j1 j2

# create hivGene.fa
    tabToFa hivGene

# BLAT against base genome
    gfClient localhost 17796 /gbdb/hiva244/nib -out=psl -t=dnax -q=prot  hivGene.fa hivGene.psl

# load into temp table, testPsl
    hgLoadPsl hiva244 table=testPsl hivGene.psl
# finish load manually to overcome permission problem
     hgsql hiva244 Ve 'load data local infile "psl.tab" into table testPsl'
     
# create initial bed file
    hgsql hiva244 -N -e \
    'select "chr1", tStart, tEnd, qName from testPsl where (tEnd-tStart)/3/qSize>0.70'\
     >hivGene.bed0

# remove tat and rev, upper case ENV, POL, and GAG
    cat hivGene.bed0 |grep -v tat |grep -v rev|sed -e 's/env/ENV/' |sed -e 's/pol/POL/'|\
    sed -e 's/gag/GAG/' >hivGene.bed1

# The alignment for A244 base genome is just too poor for any of those V* regions
# This track really does not provide much significant info.
# manually add ENV entry
     
     cp hivGene.bed1 hivGene.bed
     
    vi hivGene.bed

# load it into hivGene table

    hgLoadBed hiva244 hivGene hivGene.bed
    hgsql hiva244 -e 'drop table testPsl'

# CREATE INTERPRO TRACK

   cd /hive/groups/gsid/medical/hiv/hiva244
   mkdir interPro
   cd interPro

# get all HIV-1 domain sequences
# use what we built for hiv1 before

   cp /hive/groups/gsid/medical/hiv/hiv1/interPro/interProHiv1.fa interProhiva244.fa

# BLAT against base genome
    gfClient localhost 17796 /gbdb/hiva244/nib -out=psl -t=dnax -q=prot  interProhiva244.fa interProhiva244.psl

# load it into a temp table
    hgLoadPsl hiva244 table=testPsl interProhiva244.psl
# finish above manually.
    hgsql hiva244 Ve load data local infile "psl.tab" into table  testPsl

# create bed file from this temp table
    hgsql hiva244 -N -e \
    'select "chr1", tStart, tEnd, qName from testPsl where (tEnd-tStart)/3/qSize>0.42' \
    > interProhiva244.bed

# load the bed file into the track table
    hgLoadBed hiva244 interPro interProhiva244.bed

# drop the temp table.
    hgsql hiva244 -e 'drop table testPsl'

# CREATE CONSERVATION TRACKS

    mkdir -p /hive/groups/gsid/medical/hiv/hiva244/conservation
    cd /hive/groups/gsid/medical/hiv/hiva244/conservation

# create the .wig file and .fa file of the consensus sequence.

    gsidMsa hiva244 vax003BMsa A244-gp120 1 vax003BCons.wig vax003BConsensus.fa
    gsidMsa hiva244 vax003AEMsa A244-gp120 1 vax003AECons.wig vax003AEConsensus.fa

# encode and load the wig file
    wigEncode vax003BCons.wig stdout vax003BCons.wib \
    | hgLoadWiggle hiva244 vax003BCons stdin

    wigEncode vax003AECons.wig stdout vax003AECons.wib \
    | hgLoadWiggle hiva244 vax003AECons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hiva244/wib
    cp vax003BCons.wib /gbdb/hiva244/wib

    cp vax003AECons.wib /gbdb/hiva244/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file

    gsidAaMsa2 hiva244 vax003BMsa A244-gp120 1 vax003BAaCons.wig vax003AaBConsensus.fa
    gsidAaMsa2 hiva244 vax003AEMsa A244-gp120 1 vax003AEAaCons.wig vax003AaAEConsensus.fa

# encode and load the .wib file   
    wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
    | hgLoadWiggle hiva244 vax003BAaCons stdin

    cp vax003BAaCons.wib /gbdb/hiva244/wib

    wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
    | hgLoadWiggle hiva244 vax003AEAaCons stdin

    cp vax003AEAaCons.wib /gbdb/hiva244/wib

# CREATE MAF TRACKS FOR vax003AE STRAIN

    cd /hive/groups/gsid/medical/hiv/hiva244
    mkdir -p msa/AE
    cd msa/AE

# create a script file, doall

    hgsql hiva244 -N -e \
    'select id from vax003AEMsa where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hiva244 -N -e  "select id, seq from vax003AEMsa where id='${1}'"
    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX003AE
    doall >a244.tab
# convert .tab into .fa file
    tabToFa a244

# grab the base alignment sequence
    echo ">hiva244" >a244.aln
    hgsql hiva244 -N -e 'select seq from vax003AEMsa where id="A244-gp120"'  >> a244.aln

# prepare an interium file, jjAll.mfa
    cat a244.aln a244.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafA244AE to create a precursor file for the final .maf

    xmfaToMafA244AE jjAll.mfa j.out  org1=hiva244
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

#    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hiva244/vax003AEMaf
    cp chr1.maf /gbdb/hiva244/vax003AEMaf -p
    echo before load
    hgLoadMaf hiva244 vax003AEMaf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hiva244/vax003AEAaMaf 
    cp -p chr1.maf /gbdb/hiva244/vax003AEAaMaf
    hgLoadMaf hiva244 vax003AEAaMaf


# CREATE MAF TRACKS FOR vax003B STRAIN

    cd /hive/groups/gsid/medical/hiv/hiva244
    mkdir -p msa/B
    cd msa/B

# create a script file, doall

    hgsql hiva244 -N -e \
    'select id from vax003BMsa where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hiva244 -N -e  "select id, seq from vax003BMsa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX003B
    doall >a244.tab
# convert .tab into .fa file
    tabToFa a244

# grab the base alignment sequence
    echo ">hiva244" >a244.aln
    hgsql hiva244 -N -e 'select seq from vax003BMsa where id="A244-gp120"'  >> a244.aln

# prepare an interium file, jjAll.mfa
    cat a244.aln a244.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafA244B to create a precursor file for the final .maf

    xmfaToMafA244B jjAll.mfa j.out  org1=hiva244
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

#    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hiva244/vax003BMaf
    cp chr1.maf /gbdb/hiva244/vax003BMaf -p
    echo before load
    hgLoadMaf hiva244 vax003BMaf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hiva244/vax003BAaMaf 
    cp -p chr1.maf /gbdb/hiva244/vax003BAaMaf
    hgLoadMaf hiva244 vax003BAaMaf

##########################################################################
# REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan)

# See details in hivVax003Vax004.txt.
###########################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE B

    ssh hiv1
    mkdir -p /hive/groups/gsid/medical/hiv/posSelection/B/hiva244
    cd /hive/groups/gsid/medical/hiv/posSelection/B/hiva244

# BLAT /cluster/hive/groups/gsid/medical/hiv/posSelection/B/BMsaAaConsensus.fa
# against hiva244 base genome, select psl without header option
# cut and paste the result into the file BMsa.psl

hgLoadPsl -keep -table=BMsaPsl -nobin hiva244 BMsa.psl 

# will get the following error:

#Processing BMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT  INFILE
'/cluster/hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsa.psl'  INTO
TABLE BMsaPsl

#mySQL error 13: Can't get stat of
'/cluster/hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsa.psl'
(Errcode: 13)

# load manually then

hgsql hiva244
load data local infile "BMsa.psl" into table BMsaPsl;
quit

# build positive selection tracks for model 2 and model 8.

gsidPosSelect hiva244  BMsaPsl posSelBuild pSelectBModel2  posSelBModel2.bed
hgLoadBed hiva244 posSelBModel2 posSelBModel2.bed

gsidPosSelect hiva244 BMsaPsl posSelBuild pSelectBModel8  posSelBModel8.bed
hgLoadBed hiva244 posSelBModel8 posSelBModel8.bed

##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE

    ssh hiv1
    mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hiva244
    cd /hive/groups/gsid/medical/hiv/posSelection/AE/hiva244

# BLAT
# /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa
# against hiva244 base genome, select psl without header option
# cut and paste the result into the file AEMsa.psl

hgLoadPsl -keep -table=AEMsaPsl -nobin hiva244 AEMsa.psl 

# will get the following error:

#Processing AEMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT  INFILE
'/cluster/hive/groups/gsid/medical/hiv/hiva244/posSelection/AEMsa.psl'  INTO
TABLE AEMsaPsl

#mySQL error 13: Can't get stat of
'/cluster/hive/groups/gsid/medical/hiv/hiva244/posSelection/AEMsa.psl'
(Errcode: 13)

# load manually then

hgsql hiva244
load data local infile "AEMsa.psl" into table AEMsaPsl;
quit

# build positive selection tracks for model 2 and model 8.

gsidPosSelect hiva244  AEMsaPsl posSelBuild pSelectAEModel2
posSelAEModel2.bed
hgLoadBed hiva244 posSelAEModel2 posSelAEModel2.bed

gsidPosSelect hiva244 AEMsaPsl posSelBuild pSelectAEModel8  posSelAEModel8.bed
hgLoadBed hiva244 posSelAEModel8 posSelAEModel8.bed

##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09)

# Please see the corresponding section in hivVax003Vax004.txt for details.
##########################################################################

