
##########################################################################
# CREATE HIV1 DATABASE (STARTED 8/1/06, Fan) 

# for emacs: -*- mode: sh; -*-

# DOWNLOAD SEQUENCE (DONE, Fan, 08/07/06)
# Re-done to use the reference sequence, K03455. 

    ssh hgwdev
    mkdir /cluster/store12/hiv1
    cd /cluster/store12/hiv1

    # Get HIV1 HXB2 reference sequence, K03455.fa, from NCBI 
    # >gi|1906382|gb|K03455.1|HIVHXB2CG Human immunodeficiency virus type 1 (HXB2), complete genome; HIV1/H
TLV-III/LAV reference genome

    # translate to nib
    ln -s /cluster/store12/hiv1 ~/hiv1
    cd ~/hiv1
    mkdir nib
    ln -s K03455.fa chr1.fa
    faToNib chr1.fa nib/chr1.nib

CREATING DATABASE (DONE 08/01/06)

    # Create the hiv1 database.
    # Galt already created it.

    # ssh hgwdev
    # echo 'create database hiv1' | hgsql ''

    # make a semi-permanent read-only alias:
    alias hiv1 "mysql -u hguser -phguserstuff -A hiv1"
    # Use df to ake sure there is at least 5 gig free on 
    # hgwdev:/var/lib/mysql

# CREATING GRP TABLE FOR TRACK GROUPING (DONE 08/2/06)
    ssh hgwdev
    echo "create table grp (PRIMARY KEY(NAME)) select * from hg17.grp" \
      | hgsql hiv1

    # remove ENCODE groups
    echo "delete from grp where name like 'encode%'" | hgsql hiv1

STORING O+O SEQUENCE AND ASSEMBLY INFORMATION  (DONE 08/02/06)

    # Make symbolic links from /gbdb/hiv1/nib to the real nibs.
    ssh hgwdev
    mkdir -p /gbdb/hiv1/nib
    foreach f (/cluster/store12/hiv1/nib/*.nib)
      ln -s $f /gbdb/hiv1/nib
    end
    # Load /gbdb/hiv1/nib paths into database and save size info.
    hgsql hiv1  < ~/src/hg/lib/chromInfo.sql
    cd ~/hiv1
    hgNibSeq -preMadeNib hiv1 /gbdb/hiv1/nib chr1.fa
    echo "select chrom,size from chromInfo" | hgsql -N hiv1 > chrom.sizes
    
    # Set up relational mrna tables.
    hgLoadRna drop hiv1
    hgLoadRna new hiv1
    # This created quite a few tables, not sure they are really needed.

MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR HIV1 (DONE 08/02/06)
    echo 'insert into defaultDb values("HIV (HXB2)", "hiv1");' \
      | hgsql -h genome-testdb hgcentraltest

    echo 'insert into dbDb values("hiv1", "Aug. 2006", \
          "/gbdb/hiv1/nib", "Human immunodeficiency virus 1", "chr1", 1, 99.5, "HIV (HXB2)","Human immunodeficiency virus 1", "/gbdb/hiv1/html/description.html", 0, 0, "GenBank sequence as of Aug. 1st, 2006");' \
      | hgsql -h genome-testdb hgcentraltest

    echo 'insert into genomeClade values("HIV (HXB2)", "other", 100);'\
      | hgsql -h genome-testdb hgcentraltest

    # Make trackDb table so browser knows what tracks to expect:
    ssh hgwdev
    cd ~/src/hg/makeDb/trackDb
    cvs up -d -P

    # Edit that makefile to add hiv1 in all the right places

    vi makefile

    make update
    make alpha
    cvs commit makefile

# MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR HIV1 
    ssh hgwdev
    echo 'insert into blatServers values("hiv1", "blat11", "17784", "1", "0"); \
          insert into blatServers values("hiv1", "blat11", "17785", "0", "0");' \
      | hgsql -h genome-testdb hgcentraltest


    create table blatServers select * from hgcentraltest.blatServers;
    delete from blatServers where db!='hiv1';

    create table clade select * from hgcentraltest.clade;
    delete from clade where name != 'other';

    create table dbDb select * from hgcentraltest.dbDb;
    delete from dbDb where name != 'hiv1';
    
    create table defaultDb select * from hgcentraltest.defaultDb;
    delete from  defaultDb where name != 'hiv1';

    create table genomeClade select * from hgcentraltest.genomeClade;
    delete from  genomeClade where genome != 'HIV (HXB2)';

    CREATE TABLE sessionDb (
  	id int(10) unsigned NOT NULL auto_increment,
  	contents longblob NOT NULL,
  	reserved tinyint(4) NOT NULL default '0',
  	firstUse datetime NOT NULL default '0000-00-00 00:00:00',
  	lastUse datetime NOT NULL default '0000-00-00 00:00:00',
  	useCount int(11) NOT NULL default '0',
  	PRIMARY KEY  (id)
) TYPE=MyISAM;

    CREATE TABLE userDb (
  id int(10) unsigned NOT NULL auto_increment,
  contents longblob NOT NULL,
  reserved tinyint(4) NOT NULL default '0',
  firstUse datetime NOT NULL default '0000-00-00 00:00:00',
  lastUse datetime NOT NULL default '0000-00-00 00:00:00',
  useCount int(11) NOT NULL default '0',
  PRIMARY KEY  (id)
  ) TYPE=MyISAM;

# Create trackDb_aidsvax table

    hgsql hiv1

    CREATE TABLE trackDb_aidsvax (
    tableName varchar(255) not null,    # Symbolic ID of Track
    shortLabel varchar(255) not null,   # Short label displayed on left
    type varchar(255) not null, # Track type: bed, psl, genePred, etc.
    longLabel varchar(255) not null,    # Long label displayed in middle
    visibility tinyint unsigned not null,       # 0=hide, 1=dense, 2=full, 3=pack
    priority float not null,    # 0-100 - where to position.  0 is top
    colorR tinyint unsigned not null,   # Color red component 0-255
    colorG tinyint unsigned not null,   # Color green component 0-255
    colorB tinyint unsigned not null,   # Color blue component 0-255
    altColorR tinyint unsigned not null,        # Light color red component 0-255
    altColorG tinyint unsigned not null,        # Light color green component 0-255
    altColorB tinyint unsigned not null,        # Light color blue component 0-255
    useScore tinyint unsigned not null, # 1 if use score, 0 if not
    private tinyint unsigned not null,  # 1 if only want to show it on test site
    restrictCount int not null, # Number of chromosomes this is on (0=all though!)
    restrictList longblob not null,     # List of chromosomes this is on
    url longblob not null,      # URL to link to when they click on an item
    html longblob not null,     # Some html to display when they click on an item
    grp varchar(255) not null,  # Which group track belongs to
    canPack tinyint unsigned not null,  # 1 if can pack track display, 0 otherwise
    settings longblob not null, # Name/value pairs for track-specific stuff
              #Indices
    PRIMARY KEY(tableName)
);

load data local infile "trackDb_fanhsu.tab" into table trackDb_aidsvax;

CREATE TABLE hgFindSpec_aidsvax (
  searchName varchar(255) NOT NULL default '',
  searchTable varchar(255) NOT NULL default '',
  searchMethod varchar(255) NOT NULL default '',
  searchType varchar(255) NOT NULL default '',
  shortCircuit tinyint(3) unsigned NOT NULL default '0',
  termRegex varchar(255) NOT NULL default '',
  query varchar(255) NOT NULL default '',
  xrefTable varchar(255) NOT NULL default '',
  xrefQuery varchar(255) NOT NULL default '',
  searchPriority float NOT NULL default '0',
  searchDescription varchar(255) NOT NULL default '',
  searchSettings longblob NOT NULL,
  PRIMARY KEY  (searchName)
) TYPE=MyISAM;

CREATE TABLE gsSubjInfo (
    subjId varchar(255) not null,       # Subject ID
    gender varchar(255) not null,       # Gender
    age varchar(255) not null,  # Age
    race varchar(255) not null, # Race
    immunStatus varchar(255) not null,  # Immunization Status
    labCode varchar(255) not null,      # MB Lab Code
    specimenNo varchar(255) not null,   # Specimen Number
    injections int not null,    # Total number of injections
    riskFactor varchar(255) not null,   # Risk factor
    geography varchar(255) not null,    # Geographic information
    dateInfect varchar(255) not null,   # Date of infection
    daysInfect varchar(255) not null,   # Days of infection
    daysInfectN varchar(255) not null,  # Days of infection
              #Indices
    PRIMARY KEY(subjId)
);

CREATE TABLE gsClinicRec (
    specimenId varchar(255) not null,           # Specimen Number
    subjId varchar(255) not null,               # Subject ID
    labCode varchar(255) not null,              # MB Lab Code
    dateCollection varchar(255) not null,       # Date of collection
    daysCollection varchar(255) not null,       # Days of collection
    rnaQPcr varchar(255),                       # RNA quant PCR
    cd2Count    int(10),                        # CD4 counts
    PRIMARY KEY(specimenId)
);

# CREATE INTERPRO TRACK (DONE, Fan, 7/30/07).

# get fasta file of protein sequences for each InterPro HIV1 domain.

    getInterProFa interProXrefHiv1 interProHiv1.fa

# BLAT the sequence agains the base genome

    gfClient localhost 17778 /gbdb/hiv1/nib -out=psl -t=dnax -q=prot interProHiv1.fa interProHiv1.psl

# Load the resulting psl.
# copy the .psl file to my home directory and do hgLoadPsl there 
# to avoid access permission problem

    pushd .
    cp interProHiv1.psl ~
    cd ~
    hgLoadPsl hiv1 table=testPsl interProHiv1.psl
    rm interProHiv1.psl
    hgsql hiv1 -e 'drop table testPsl'
    popd 

# Create the bed file

    hgsql hiv1 -N -e 'select "chr1", tStart, tEnd,  qName from testPsl where (tEnd-tStart)/3/qSize>0.42' >interProHiv1.bed

# load the bed file into the interPro table.

    hgLoadBed hiv1 interPro interProHiv1.bed

# CREATE hivGene TRACK (DONE. 7/31/07, Fan)

# copy the sequence data from HIV Sequence Database web page:
# http://www.hiv.lanl.gov/content/hiv-db/REVIEWS/HXB2.html and 
# create raw sequence file, landMark.txt

# Process landMark.txt into hivGene.fa

    faToTab -type=protein landMark.txt landMark.tab
    cut -f 1 landMark.tab |toLower stdin j1
    cut -f 2 landMark.tab >j2
    paste j1 j2 >hivGene.tab
    rm j1 j2

# create hivGene.fa
    tabToFa hivGene

# BLAT against base genome
    gfClient localhost 17778 /gbdb/hiv1/nib -out=psl -t=dnax -q=prot  hivGene.fa hivGene.psl

# load into temp table, testPsl
    pushd .
    cp hivGene.psl ~
    cd ~
    hgLoadPsl hiv1 table=testPsl hivGene.psl
    rm hivGene.psl
    popd 

# create initial bed file
    hgsql hiv1 -N -e \
    'select "chr1", tStart, tEnd, qName from testPsl where (tEnd-tStart)/3/qSize>0.70'\
     >hivGene.bed0

# remove tat and rev, upper case ENV, POL, and GAG
    cat hivGene.bed0 |grep -v tat |grep -v rev|sed -e 's/env/ENV/' |sed -e 's/pol/POL/'|\
    sed -e 's/gag/GAG/' >hivGene.bed1

# manually create old.bed from data gathered from:

#   http://www.hiv.lanl.gov/content/hiv-db/MAP/landmark.html
#   http://www.hiv.lanl.gov/content/hiv-db/MAP/old_map/landmark.html
#   http://www.hiv.lanl.gov/content/hiv-db/MAP/old_map/Env.html
#
#   The content of old.bed is as following:
	chr1    0       634     5'LTR
	chr1    5830    5999    tat_exon1
	chr1    5969    6045    rev_exon1
	chr1    6614    6691    V1
	chr1    6695    6811    V2
	chr1    7109    7216    V3
	chr1    7376    7477    V4
	chr1    7601    7636    V5
	chr1    8378    8653    rev_exon2
	chr1    8378    8469    tat_exon2
	chr1    7726    8078    RRE
	chr1    9085    9719    3'LTR

# concatenate hivGene.bed1 and old.bed into hivGene.bed

    cat hivGene.bed1 old.bed >hivGene.bed
    rm *.bed1 *.bed0

# load it into hivGene table

    hgLoadBed hiv1 hivGene hivGene.bed
    hgsql hiv1 -e 'drop table testPsl'

# CREATE VAX004 TRACK (DONE, Fan, 8/2/07).

    ssh hiv1
    mkdir -p /cluster/store12/medical/hiv/hiv1/vax004
    cd /cluster/store12/medical/hiv/hiv1/vax004

    hgsql hiv1 -N -e 'select * from dnaSeq where id like "%U%"' >vax004.tab
    tabToFa vax004

    mkdir -p /gbdb/hiv1/vax004
    cp -p vax004.fa /gbdb/hiv1/vax004/vax004.fa

    hgLoadSeq -replace hiv1 /gbdb/hiv1/vax004/vax004.fa

    gfClient -minScore=200 -minIdentity=80 -nohead hiv1.cse.ucsc.edu 17779 \
    /gbdb/hiv1/nib -out=psl -t=dna -q=dna vax004.fa vax004.psl

    wc *.psl
    cut -f 10 vax004.psl |wc
    cut -f 10 vax004.psl |sort -u |wc

# go to home directory to do hgLoadPsl
    cp vax004.psl ~
    cd ~
    hgLoadPsl hiv1 vax004.psl
    rm vax004.psl

# CREATE HIVGENE TRACK

# copy and paste to create landMark.txt from the "Numbering Positions in HIV 
# Relative to HXB2CG" page at HIV Sequence Database 

    faToTab -type=protein landMark.txt landMark.tab
    cut -f 1 landMark.tab |toLower stdin j1
    cut -f 2 landMark.tab >j2
    paste j1 j2 >hivGene.tab
    rm j1 j2
    tabToFa hivGene

# BLAT against base genome
    gfClient localhost 17778 /gbdb/hiv1/nib \
    -out=psl -t=dnax -q=prot hivGene.fa hivGene.psl

# load psl result to temp table, testPsl
    pushd .
    cp hivGene.psl ~
    cd ~
    hgLoadPsl hiv1 table=testPsl hivGene.psl
    rm hivGene.psl
    popd 

# create .bed file
    hgsql hiv1 -N -e \
    'select "chr1", tStart, tEnd,  qName from testPsl where  (tEnd-tStart)/3/qSize>0.70' \
    > hivGene.bed0
    cat hivGene.bed0 |grep -v tat |grep -v rev|sed -e 's/env/ENV/' \
    |sed -e 's/pol/POL/'|\
    sed -e 's/gag/GAG/' >hivGene.bed1
    cat hivGene.bed1 old.bed >hivGene.bed
    rm *.bed1 *.bed0
# load into track table hivGene
    hgLoadBed hiv1 hivGene hivGene.bed
    hgsql hiv1 -e 'drop table testPsl'

# CREATE INTERPRO TRACK

# get all HIV-1 domain sequences
    getInterProFa interProXrefHiv1 interProHiv1.fa

# BLAT against base genome
    gfClient localhost 17778 /gbdb/hiv1/nib -out=psl -t=dnax -q=prot \
    interProHiv1.fa interProHiv1.psl

# load it into a temp table
    hgLoadPsl hiv1 table=testPsl interProHiv1.psl

# create bed file from this temp table
    hgsql hiv1 -N -e \
    'select "chr1", tStart, tEnd, qName from testPsl where (tEnd-tStart)/3/qSize>0.42' \
    > interProHiv1.bed

# load the bed file into the track table
    hgLoadBed hiv1 interPro interProHiv1.bed

# drop the temp table.
    hgsql hiv1 -e 'drop table testPsl'

# CREATE CONSERVATION TRACKS

    mkdir -p /cluster/store12/medical/hiv/hiv1/conservation
    cd /cluster/store12/medical/hiv/hiv1/conservation

# create the .wig file and .fa file of the consensus sequence.
    gsidMsa hiv1 vax004Msa HXB2 6348 vax004Cons.wig vax004Consensus.fa

# encode and load the wig file
    wigEncode vax004Cons.wig stdout vax004Cons.wib \
    | hgLoadWiggle hiv1 vax004Cons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hiv1/wib
    cp vax004Cons.wib /gbdb/hiv1/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file
    gsidAaMsa2 hiv1 vax004Msa HXB2 6348 vax004AaCons.wig vax004AaConsensus.fa

# encode and load the .wib file   
    wigEncode vax004AaCons.wig stdout vax004AaCons.wib \
    | hgLoadWiggle hiv1 vax004AaCons stdin

    cp vax004AaCons.wib /gbdb/hiv1/wib

# CREATE MAF TRACKS

    mkdir -p /cluster/store12/medical/hiv/hiv1/msa
    cd /cluster/store12/medical/hiv/hiv1/msa

# create a script file, doall

    hgsql hiv1 -N -e \
    'select id from dnaSeq where id like "%U%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hiv1 -N -e  "select id, seq from vax004Msa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX004
    doall >hiv1.tab
# convert .tab into .fa file
    tabToFa hiv1

# grab the base alignment sequence
    echo ">hiv1" >hiv1.aln
    hgsql hiv1 -N -e 'select seq from vax004Msa where id="HXB2"'  >> hiv1.aln

# prepare an interium file, jjAll.mfa
    cat hiv1.aln hiv1.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafHiv1 to create a precursor file for the final .maf

    xmfaToMafHiv1 jjAll.mfa j.out  org1=hiv1
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf

# copy .maf to /gbdb.
    mkdir -p  /gbdb/hiv1/vax004Maf 
    cp chr1.maf /gbdb/hiv1/vax004Maf -p
    echo before load
    hgLoadMaf hiv1 vax004Maf

# create another copy for protein MAF.
    mkdir -p  /gbdb/hiv1/vax004AaMaf 
    cp -p chr1.maf /gbdb/hiv1/vax004AaMaf
    hgLoadMaf hiv1 vax004AaMaf

##########################################################################
# UPDATE CLINICAL DB TABLES ACCORDING TO CORRECTED DATA FILES FROM GSID
# (STARTED 3/8/08, DONE 3/12/08, Fan)

# Save .xls files sent by Elizabeth to .txt files and store them under
# /cluster/store12/medical/vaxGen/clinicalData/20080308/vax004/new/

#    GSID_DEMOG_SEQNO20080304.txt     
#    VAXGENRND_CD4_SEQNOf_20080308.txt
#    VAXGENRND_CD4_SEQNO20080308.txt

    mkdir -p /cluster/store12/medical/hiv/hiv1/clinical
    cd /cluster/store12/medical/hiv/hiv1/clinical

    cp -p /cluster/store12/medical/vaxGen/clinicalData/20080308/vax004/new/*.txt .

# store raw data into xxxxRaw tables
    processRaw GSID_DEMOG_SEQNO20080304.txt 
    processRaw VAXGENRND_CD4_SEQNO20080308.txt
    processRaw VAXGENRND_CD4_SEQNOf_20080308.txt

    hgsql hiv1 < GSID_DEMOG_SEQNO20080304Raw.sql
    hgsql hiv1 < VAXGENRND_CD4_SEQNO20080308Raw.sql
    hgsql hiv1 < VAXGENRND_CD4_SEQNOf_20080308Raw.sql

    hgsql hiv1 -e \
    'load data local infile "GSID_DEMOG_SEQNO20080304.txt" into table GSID_DEMOG_SEQNO20080304Raw ignore 1 lines'

    hgsql hiv1 -e \
    'load data local infile "VAXGENRND_CD4_SEQNO20080308.txt" into table VAXGENRND_CD4_SEQNO20080308Raw ignore 1 lines'

    hgsql hiv1 -e \
    'load data local infile "VAXGENRND_CD4_SEQNOf_20080308.txt" into table VAXGENRND_CD4_SEQNOf_20080308Raw ignore 1 lines'

# building initial gsidClinicRecTemp table ...
# build initial gsidClinicRecTemp table, filter out duplicates for GSID4124

    hgsql hiv1 -N -e \
    'select "specId",subjId,labCode,daysOfCollection,"rna","cd4" from VAXGENRND_CD4_SEQNO20080308Raw where labCode !="U-210"' \
    >gsidClinicRecTemp.tab

    hgsql hiv1 -e 'delete from gsidClinicRecTemp'
    hgsql hiv1 -e \
    'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp'

# building subjLabcode table ...
    hgsql hiv1 -N -e 'select subjId, labCode from GSID_DEMOG_SEQNO20080304Raw where labCode!=""'\
    | sort -u > subjLabcode.tab

    hgsql hiv1 -e "drop table subjLabcode"
    hgsql hiv1 < subjLabcode.sql
    hgsql hiv1 -e 'load data local infile "subjLabcode.tab" into table subjLabcode'
    hgsql hiv1 -e 'update subjLabcode set labCode="U-166" where labCode="U-166/U-210"'

# fill in labCode

    hgsql hiv1 -e \
    'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId'

# fill in specimenId
    hgsql hiv1 -e \
    'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO20080304Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""'

# fill in RNA
    hgsql hiv1 -e \
    'update gsidClinicRecTemp t, VAXGENRND_CD4_SEQNO20080308Raw r set t.hivQuan=r.TestResult where t.subjId=r.subjId and t.daysCollection=r.daysOfCollection and TestName="HIV-1 RNA QUANT PCR"' 

# fill in CD4
    hgsql hiv1 -e \
    'update gsidClinicRecTemp t, VAXGENRND_CD4_SEQNO20080308Raw r set t.cd4Count=r.TestResult where t.subjId=r.subjId and t.daysCollection=r.daysOfCollection and TestName="ABSOLUTE CD4"'

# change GSID4124 specimenId ...
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set specimenId="098-5724662" where specimenId like "098-5724662%"'

# change "< xxx" to 200
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="200" where hivQuan like "<%"'

# changing "> xxx" ... to 1000000
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="1000000" where hivQuan like ">%"'

# change "." to NULL
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="NULL" where hivQuan="."'

# update rna NULL ...
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="NULL" where hivQuan="rna"'

# update gsidClinicRecTemp set hivQuan="-1" where hivQuan="rna"'

# update rna "WE ..."
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="NULL" where hivQuan like "WE%"'

# update rna blank
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set hivQuan="NULL" where hivQuan=""'

# update cd4 "WE ..."
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count like "WE%"'

# update cd4 NULL ...
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="cd4"'

# update daysCollection NULL ...
    hgsql hiv1 -e \
    'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."'


    hgsql hiv1 -N -e 'select * from gsidClinicRecTemp ' \
    |uniq |sed -e 's/NULL/-1/g'  > gsidClinicRecNew.tab

# load data into gsidClinicRecNew
    hgsql hiv1 -e 'drop table gsidClinicRecNew'
    hgsql hiv1 < gsidClinicRecNew.sql
    hgsql hiv1 -e 'delete from gsidClinicRecNew'
    hgsql hiv1 -e 'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew'

# set NULLs
    hgsql hiv1 -e 'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1'
    hgsql hiv1 -e 'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1'
    hgsql hiv1 -e 'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1'
    hgsql hiv1 -e 'delete from gsidClinicRecNew where labCode="" and subjId=""'
    hgsql hiv1 -e 'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"'

# check difference between the new table and the old table
    hgsql hiv1 -N -e 'select * from gsidClinicRec where subjId not like"GSID3%"' \
    > gsidClinicRecVax004.tab
    hgsql hiv1 -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew.tab
    diff gsidClinicRecVax004.tab gsidClinicRecNew.tab  >gsidClinicRec_diff.txt

# Inspect the diff result to see if there is anything unexpected.
# Once satisfied, rename the new tables.

    hgsql hiv1 -N -e 'rename table gsidClinicRecNew to gsidClinicRec' 

# create VAXGENRND_CD4_SEQNO20080308RawFixed to fix those "<xxx" and ">xxx" fields.
    hgsql hiv1 <VAXGENRND_CD4_SEQNO20080308RawFixed.sql

    hgsql hiv1 -e \
    'load data local infile "VAXGENRND_CD4_SEQNO20080308.txt" into table VAXGENRND_CD4_SEQNO20080308RawFixed ignore 1 lines'

    hgsql hiv1 -e \
    'update VAXGENRND_CD4_SEQNO20080308RawFixed set testResult='1000000' where testName="HIV-1 RNA QUANT PCR"  and TestResult like ">%"'

    hgsql hiv1 -e \
    'update VAXGENRND_CD4_SEQNO20080308RawFixed set testResult='200' where testName="HIV-1 RNA QUANT PCR"  and TestResult like "<%"'

# use the newly created ...Fixed table to construct the gsidClinicrecWithSeq table

    hgsql hiv1 -N -e \
    'select c.* from VAXGENRND_CD4_SEQNO20080308RawFixed t,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and t.subjId=c.subjId and testName="ABSOLUTE CD4" and c.labCode=t.labCode and cd4Count=testResult and t.DaysOfCollection=c.daysCollection' >j.1

    hgsql hiv1 -N -e \
    'select c.* from VAXGENRND_CD4_SEQNO20080308RawFixed t,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and t.subjId=c.subjId and testName="HIV-1 RNA QUANT PCR" and c.labCode=t.labCode and hivQuan=testResult and t.DaysOfCollection=c.daysCollection' >j.2

    cat j.1 j.2 |sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp

    hgsql hiv1 -e 'drop table gsidClinicRecWithSeqNew'
    hgsql hiv1 < gsidClinicRecWithSeqNew.sql
    hgsql hiv1 -e \
    'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew'
    rm gsidClinicRecWithSeqNew.tmp

    hgsql hiv1 -e 'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1'
    hgsql hiv1 -e 'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1'

    hgsql hiv1 -N -e 'select * from gsidClinicRecWithSeqOld' >gsidClinicRecWithSeqOld.tab
    hgsql hiv1 -N -e 'select * from gsidClinicRecWithSeqNew' >gsidClinicRecWithSeqNew.tab

# compare the new with the old.
    diff gsidClinicRecWithSeqOld.tab gsidClinicRecWithSeqNew.tab > gsidClinicRecWithSeq_diff.txt

# Inspect the diff result to see if there is anything unexpected.
# Once satisfied, rename the new tables.

    hgsql hiv1 -N -e 'rename table gsidClinicRecWithSeqNew to gsidClinicRecWithSeq' 

# Copy gsidCinicRec and gsidClinicRecWithSeq tables from hiv1 to hivgne8 and
# hivmn1.

##########################################################################
