# for emacs: -*- mode: sh; -*-

                                                                                               
# The following are browsers built for BME 230, Spring 2004 class:
#   Aquifex aeolicus VF5             (bacteria)
#   Buchnera sp. APS                 (bacteria)
#   Methanococcus maripaludis S2     (archaea)
#   Bordetella bronchiseptica RB50   (bacteria)
#   Deinococcus radiodurans R1       (bacteria)
#   Salmonella typhimurium LT2       (bacteria)
#   Methanopyrus kandleri AV19       (archaea)
#   Aeropyrum pernix                 (archaea)
#   Escherichia coli O157:H7         (bacteria)
#   Thermoplasma acidophilum         (archaea)
#   Sulfolobus tokodaii              (archaea)
#   Pyrococcus horikoshii            (archaea)
#   Geobacter sulfurreducens PCA     (bacteria)
#   Halobacterium sp. NRC-1          (archaea)

# For single chromosome genomes, I just got a script that will set up databases:

ssh hgwdev
cd ~/kent/src/hg/makeDb/trackDb

~/kent/src/hg/makeDb/bme230.sh aquAeo1 "Aquifex aeolicus VF5" Bacteria ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Aquifex_aeolicus/NC_000918.fna "A. aeolicus VF5"
mkdir -p bacteria/aquAeo1
cvs add bacteria bacteria/aquAeo1

~/kent/src/hg/makeDb/bme230.sh bucBuc1 "Buchnera sp. APS" Bacteria ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Buchnera_sp/NC_002528.fna "B. buchnera APS"
mkdir bacteria/bucBuc1
cvs add bacteria/bucBuc1

~/kent/src/hg/makeDb/bme230.sh metMar1 "Methanococcus maripaludis S2" Archaea ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Methanococcus_maripaludis/NC_005791.fna "M. maripaludis S2"
mkdir archae/metMar1
cvs add archae/metMar1

~/kent/src/hg/makeDb/bme230.sh borBro1 "Bordetella bronchiseptica RB50" Bacteria ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Bordetella_bronchiseptica/NC_002927.fna "B. bronchiseptica RB50"
mkdir bacteria/borBro1
cvs add bacteria/borBro1

# some sort of error occurred with this one.  i'll try again later.

~/kent/src/hg/makeDb/bme230.sh salTyp1 "Salmonella typhimurium LT2" Bacteria ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Salmonella_typhimurium_LT2/NC_003197.fna "S. typhimurium LT2"
mkdir bacteria/salTyp1
cvs add bacteria/salTyp1

~/kent/src/hg/makeDb/bme230.sh metKan1 "Methanopyrus kandleri AV19" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Methanopyrus_kandleri/NC_003551.fna "M. kandleri AV19"
mkdir archae/metKan1
cvs add archae/metKan1

~/kent/src/hg/makeDb/bme230.sh aerPer1 "Aeropyrum pernix" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Aeropyrum_pernix/NC_000854.fna "A. pernix"
mkdir archae/aerPer1
cvs add archae/aerPer1

~/kent/src/hg/makeDb/bme230.sh escColH7 "Escherichia coli O157:H7" Bacteria ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Escherichia_coli_O157H7/NC_002695.fna "E. coli O157:H7"
mkdir bacteria/escColH7
cvs add bacteria/escColH7

~/kent/src/hg/makeDb/bme230.sh theAci1 "Thermoplasma acidophilum" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Thermoplasma_acidophilum/NC_002578.fna "T. acidophilum"
mkdir archae/theAci1
cvs add archae/theAci1

~/kent/src/hg/makeDb/bme230.sh sulTok1 "Sulfolobus tokodaii" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Sulfolobus_tokodaii/NC_003106.fna "S. tokodaii"
mkdir archae/sulTok1
cvs add archae/sulTok1

~/kent/src/hg/makeDb/bme230.sh pyrHor1 "Pyrococcus horikoshii" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Pyrococcus_horikoshii/NC_000961.fna "P. horikoshii"
mkdir archae/pyrHor1
cvs add archae/pyrHor1

~/kent/src/hg/makeDb/bme230.sh geoSul1 "Geobacter sulfurreducens PCA" Bacteria ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Geobacter_sulfurreducens/NC_002939.fna "G. sulfurreducens PCA"
mkdir bacteria/geoSul1
cvs add bacteria/geoSul1

~/kent/src/hg/makeDb/bme230.sh deiRad1 "Deinococcus radiodurans R1" Bacteria ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Deinococcus_radiodurans/NC_001263.fna "D. radiodurans R1"
mkdir bacteria/deiRad1
cvs add bacteria/deiRad1

~/kent/src/hg/makeDb/bme230.sh haloSp1 "Halobacterium sp. NRC-1" Archaea ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Halobacterium_sp/NC_002607.fna "Halobacterium sp. NRC-1"
mkdir archae/haloSp1
cvs add archae/haloSp1

# Deal with multi-chrom species:

# aquAeo1 extra plasmid ece1

cd /cluster/data/aquAeo1
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Aquifex_aeolicus/NC_001880.fna
sed "s/^>.*/>aquAeo1_ece1/" NC_001880.fna > ece1.fa
rm NC_001880.fna
hgNibSeq aquAeo1 /cluster/data/aquAeo1/nib chr1.fa ece1.fa
faSize -detailed ece1.fa >> chrom.sizes
mkdir html
cd html
echo '<H3>About the <em>Aquifex aeolicus VF5</em> Sequence</H3>
<P>
chr1 is the main chromosome and corresponds to <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000918">NC_000918</a>.  An extra plasmid is ece1 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_001880">NC_001880</a>).
</P>' > description.html
ln -s /cluster/data/aquAeo1/html /gbdb/aquAeo1/html
echo update dbDb set htmlPath=\"/gbdb/aquAeo1/html/description.html\" where name=\"aquAeo1\" | hgsql -h genome-testdb hgcentraltest

# for Buchnera sp. APS... plus i used the wrong scientific name

echo update dbDb set organism=\"B. aphidicola str. APS\" where name=\"bucBuc1\" | hgsql
-h genome-testdb hgcentraltest
cd /cluster/data/bucBuc1
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Buchnera_sp/NC_002252.fna
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Buchnera_sp/NC_002253.fna
sed "s/^>.*/>bucBuc1_pTrp/" NC_002252.fna > pTrp.fa
sed "s/^>.*/>bucBuc1_pLeu/" NC_002253.fna > pLeu.fa
rm NC_002253.fna NC_002252.fna
hgNibSeq bucBuc1 /cluster/data/bucBuc1/nib chr1.fa pLeu.fa pTrp.fa
faSize -detailed pLeu.fa >> chrom.sizes
faSize -detailed pTrp.fa >> chrom.sizes
mkdir html
cd html
echo '<H3>About the <em>Buchnera aphidicola str. APS</em> Sequence</H3>
<P>
chr1 is the main chromosome and corresponds to <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002528">NC_002528</a>.  Two extra plasmids are pTrp (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002252">NC_002252</a>), and pLeu (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002253">NC_002253</a>).
</P>' > description.html
ln -s /cluster/data/bucBuc1/html /gbdb/bucBuc1/html
echo update dbDb set htmlPath=\"/gbdb/bucBuc1/html/description.html\" where name=\"bucBuc1\" | hgsql
-h genome-testdb hgcentraltest

# Methanococcus maripaludis S2 only has one chrom, but I'll add a description page

cd /cluster/data/metMar1/
mkdir html
ln -s /cluster/data/metMar1/html /gbdb/metMar1/html
cd html
echo '<H3>About the <em>Methanococcus maripaludis S2</em> Sequence</H3>
<P>
chr1 is the main chromosome and corresponds to <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_005791">NC_005791</a>.
</P>' > description.html
echo update dbDb set htmlPath=\"/gbdb/metMar1/html/description.html\" where name=\"metMar1\" | hgsql -h genome-testdb hgcentraltest

# Bordetella bronchiseptica RB50 only has one chrom, but I'll add a description page

cd /cluster/data/borBro1/
mkdir html
ln -s /cluster/data/borBro1/html /gbdb/borBro1/html
cd html
echo '<H3>About the <em>Bordetella bronchiseptica RB50</em> Sequence</H3>
<P>
chr1 is the main chromosome and corresponds to <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002927">NC_002927</a>.
</P>' > description.html
echo update dbDb set htmlPath=\"/gbdb/borBro1/html/description.html\" where name=\"borBro1\" | hgsql -h genome-testdb hgcentraltest

# Deinococcus radiodurans R1 has two chroms and two extra plasmids.

cd /cluster/data/deiRad1/
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Deinococcus_radiodurans/NC_001264.fna
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Deinococcus_radiodurans/NC_000958.fna
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Deinococcus_radiodurans/NC_000959.fna
sed 's/^>.*/>deiRad1_chr2/' NC_001264.fna > chr2.fa
sed 's/^>.*/>deiRad1_MP1/' NC_000958.fna > MP1.fa
sed 's/^>.*/>deiRad1_CP1/' NC_000959.fna > CP1.fa
rm *.fna
hgNibSeq deiRad1 /cluster/data/deiRad1/nib chr1.fa chr2.fa MP1.fa CP1.fa
faSize -detailed chr2.fa >> chrom.sizes
faSize -detailed MP1.fa >> chrom.sizes
faSize -detailed CP1.fa >> chrom.sizes
mkdir html
ln -s /cluster/data/deiRad1/html /gbdb/deiRad1/html
cd html
echo '<H3>About the <em>Deinococcus radiodurans R1</em> Sequence</H3>
<P>
chr1 and chr2 are the main chromosomes and correspond to <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_001263">NC_001263</a>, and <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_001264">NC_001264</a> respectively.  Two extra plasmids are
MP1 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000958">NC_000958</a>) and CP1 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000959">NC_000959</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/deiRad1/html/description.html\" where name=\"deiRad1\" | hgsql -h genome-testdb hgcentraltest

# Salmonella typhimurium LT2 has an extra plasmid

cd /cluster/data/salTyp1
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Salmonella_typhimurium_LT2/NC_003277.fna
sed 's/^>.*/>salTyp1_pSLT/' NC_003277.fna > pSLT.fa
rm NC_003277.fna
hgNibSeq salTyp1 /cluster/data/salTyp1/nib chr1.fa pSLT.fa
faSize -detailed pSLT.fa >> chrom.sizes
mkdir html
ln -s /cluster/data/salTyp1/html /gbdb/salTyp1/html
cd html
echo '<H3>About the <em>Salmonella typhimurium LT2</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_003197">NC_003197</a>) and pSLT is an extra plasmid (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_003277">NC_003277</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/salTyp1/html/description.html\" where name=\"salTyp1\" | hgsql -h genome-testdb hgcentraltest

# Methanopyrus kandleri AV19 just has one chrom, but I'll add the description:

cd /cluster/data/metKan1
mkdir html
ln -s /cluster/data/metKan1/html /gbdb/metKan1/html
cd html
echo '<H3>About the <em>Methanopyrus kandleri AV19</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_003551">NC_003551</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/metKan1/html/description.html\" where name=\"metKan1\" | hgsql -h genome-testdb hgcentraltest

# Aeropyrum pernix just has one chrom, but I'll add the description:

cd /cluster/data/aerPer1
mkdir html
ln -s /cluster/data/aerPer1/html /gbdb/aerPer1/html
cd html
echo '<H3>About the <em>Aeropyrum pernix</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000854">NC_000854</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/aerPer1/html/description.html\" where name=\"aerPer1\" | hgsql -h genome-testdb hgcentraltest

# Escherichia coli O157:H7 has one chrom and two plasmids:

cd /cluster/data/escColH7
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Escherichia_coli_O157H7/NC_002128.fna
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Escherichia_coli_O157H7/NC_002127.fna
sed 's/^>.*/>escColH7_pO157/' NC_002128.fna > pO157.fa
sed 's/^>.*/>escColH7_pOSAK1/' NC_002127.fna > pOSAK1.fa
rm *.fna
hgNibSeq escColH7 /cluster/data/escColH7/nib chr1.fa pO157.fa pOSAK1.fa
faSize -detailed pO157.fa >> chrom.sizes
faSize -detailed pOSAK1.fa >> chrom.sizes
mkdir html
ln -s /cluster/data/escColH7/html /gbdb/escColH7/html
cd html
echo '<H3>About the <em>Escherichia coli O157:H7</em> Sequence</H3>
<P>
This is the E. coli that makes you sick.  chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002695">NC_002695</a>).
In addition there are two plasmids: pOSAK1 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002127">NC_002127</a>) and pO157 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002128">NC_002128</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/escColH7/html/description.html\" where name=\"escColH7\" | hgsql -h genome-testdb hgcentraltest

# Thermoplasma acidophilum has just one chrom:

cd /cluster/data/theAci1
mkdir html
ln -s /cluster/data/theAci1/html /gbdb/theAci1/html
cd html
echo '<H3>About the <em>Thermoplasma acidophilum</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002578">NC_002578</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/theAci1/html/description.html\" where name=\"theAci1\" | hgsql -h genome-testdb hgcentraltest

# Sulfolobus tokodaii has just one chrom:

cd /cluster/data/sulTok1
mkdir html
ln -s /cluster/data/sulTok1/html /gbdb/sulTok1/html
cd html
echo '<H3>About the <em>Sulfolobus tokodaii</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_003106">NC_003106</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/sulTok1/html/description.html\" where name=\"sulTok1\" | hgsql -h genome-testdb hgcentraltest

# Pyrococcus horikoshii has just one chrom 

cd /cluster/data/pyrHor1
mkdir html
ln -s /cluster/data/pyrHor1/html /gbdb/pyrHor1/html
cd html
echo '<H3>About the <em>Pyrococcus horikoshii</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000961">NC_000961</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/pyrHor1/html/description.html\" where name=\"pyrHor1\" | hgsql -h genome-testdb hgcentraltest

# Geobacter sulfurreducens PCA has just one chrom 

cd /cluster/data/geoSul1
mkdir html
ln -s /cluster/data/geoSul1/html /gbdb/geoSul1/html
cd html
echo '<H3>About the <em>Geobacter sulfurreducens PCA</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_000961">NC_000961</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/geoSul1/html/description.html\" where name=\"geoSul1\" | hgsql -h genome-testdb hgcentraltest

# Halobacterium sp. NRC-1 has 2 extra plasmids

cd /cluster/data/haloSp1
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Halobacterium_sp/NC_001869.fna
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Halobacterium_sp/NC_002608.fna
sed 's/^>.*/>haloSp1_pNRC100/' NC_001869.fna > pNRC100.fa
sed 's/^>.*/>haloSp1_pNRC200/' NC_002608.fna > pNRC200.fa
rm *.fna
hgNibSeq haloSp1 /cluster/data/haloSp1/nib chr1.fa pNRC100.fa pNRC200.fa
faSize -detailed pNRC100.fa >> chrom.sizes
faSize -detailed pNRC200.fa >> chrom.sizes
mkdir html
ln -s /cluster/data/haloSp1/html /gbdb/haloSp1/html
cd html
echo '<H3>About the <em>Halobacterium sp. NRC-1</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002607">NC_002607</a>).
There are two more plasmids: pNRC100 (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_001869">NC_001869</a>)and pNRC200 (<a href="href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_002608">NC_002608</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/haloSp1/html/description.html\" where name=\"haloSp1\" | hgsql -h genome-testdb hgcentraltest

# so now theres a problem with make alpha.  i'm going to make empty trackDb.ra files in
# the directories to avoid this.

cd ~/kent/src/hg/makeDb/trackDb
mkdir bacteria
cvs add bacteria
for i in aquAeo1 bucBuc1 salTyp1 escColH7 deiRad1 borBro1 geoSul1; do 
   mkdir -p bacteria/$i
   touch bacteria/$i/trackDb.ra
   cvs add bacteria/$i
   cvs add bacteria/$i/trackDb.ra
done
touch bacteria/trackDb.ra
cvs add bacteria/trackDb.ra

mkdir archaea
cvs add archaea
for i in haloSp1 pyrHor1 sulTok1 theAci1 aerPer1 metMar1 metKan1; do
   mkdir -p archaea/$i
   touch archaea/$i/trackDb.ra
   cvs add archaea/$i
   cvs add archaea/$i/trackDb.ra
done
touch archaea/trackDb.ra
cvs add archaea/trackDb.ra

# 04-08-2004: need to add Synecoccus

cd ~/kent/src/hg/makeDb/trackDb
~/kent/src/hg/makeDb/bme230.sh synSp1 "Synechococcus sp. WH 8102" Bacteria ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Synechococcus_sp_WH8102/NC_005070.fna "Synechococcus sp. WH 8102"
mkdir -p bacteria/synSp1
cvs add bacteria/synSp1
touch bacteria/synSp1/trackDb.ra
cd /cluster/data/synSp1
mkdir html
ln -s /cluster/data/synSp1/html /gbdb/synSp1/html
cd html
echo '<H3>About the <em>Synechococcus sp. WH 8102</em> Sequence</H3>
<P>
chr1 is the main chromosome (<a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NC_005070">NC_005070</a>).' > description.html
echo update dbDb set htmlPath=\"/gbdb/synSp1/html/description.html\" where name=\"synSp1\" | hgsql -h genome-testdb hgcentraltest
