# MULTIZ with other methanogens
# DONE (10/15/05), kpollard

    cd /cluster/data/methBurt1/bed
    mkdir conservation
    cd conservation
    cp /cluster/data/methBark1/bed/conservation/HoxD55.q .
    cp /cluster/data/methBark1/bed/conservation/*.nib .
    cp /cluster/data/methBark1/bed/conservation/*.chr .
    cp /cluster/data/methBark1/bed/conservation/methBark1.fa .
    cp /cluster/data/methBark1/bed/conservation/methBurt1.fa .
    faToTwoBit methBark1.fa methBark1.2bit
    faToTwoBit methBurt1.fa methBurt1.2bit
    sed s/chr/methBark1.chr/ methBark1.fa > temp
    sed s/plas/methBark1.plas/ temp > methBark1.fa
    rm temp
	
    #chrom sizes
    faSize -detailed *.chr *.fa > chrom.sizes
    
    #split methBurt1.fa
    faSplit byname methBurt1.fa ./
    foreach f (methBurt1.Contig*.fa)
	echo `basename $f .fa` >> contigs.txt
    end

    #blastz (run on kolossus)
    ssh kolossus	
    cd /cluster/data/methBurt1/bed/conservation/
    foreach f (`cat contigs.txt`)
	echo $f
	blastz $f.fa methBark1.fa Q=HoxD55.q > ${f}-methBark1.lav
	blastz $f.fa methMaze1.chr Q=HoxD55.q > ${f}-methMaze1.lav
	blastz $f.fa metAce1.chr Q=HoxD55.q > ${f}-metAce1.lav
    end
	 
    foreach f (`cat contigs.txt`)
	echo $f
	echo "lavToAxt"
	lavToAxt ${f}-methBark1.lav methBurt1.2bit methBark1.2bit ${f}-methBark1.axt
	lavToAxt ${f}-methMaze1.lav methBurt1.2bit . ${f}-methMaze1.axt
	lavToAxt ${f}-metAce1.lav methBurt1.2bit . ${f}-metAce1.axt
	echo "axtBest"
	axtBest ${f}-methBark1.axt all -winSize=500 -minScore=5000  ${f}-methBark1-best.axt
	axtBest ${f}-methMaze1.axt all -winSize=500 -minScore=5000  ${f}-methMaze1-best.axt
	axtBest ${f}-metAce1.axt all -winSize=500 -minScore=5000  ${f}-metAce1-best.axt
	echo "axtToMaf"
	axtToMaf ${f}-methBark1-best.axt chrom.sizes chrom.sizes ${f}-methBark1.maf
	axtToMaf ${f}-methMaze1-best.axt chrom.sizes chrom.sizes ${f}-methMaze1.maf
	axtToMaf ${f}-metAce1-best.axt chrom.sizes chrom.sizes ${f}-metAce1.maf
    end

    #multiz
    foreach f(*.maf)
	cat $f | gawk 'BEGIN{getline; print $0; getline; getline; getline; getline;}{print $0;}' > methBurt
	mv methBurt $f
    end

    foreach f(`cat contigs.txt`)
	echo $f
	multiz ${f}-methBark1.maf ${f}-methMaze1.maf - > ${f}-methBark1-methMaze1.maf
	multiz ${f}-metAce1.maf ${f}-methBark1-methMaze1.maf - >  ${f}-methBark1-methMaze1-metAce1.maf
    end

    #get rid of MAF files with no data
    foreach f(`cat contigs.txt`)
	wc -l ${f}-methBark1-methMaze1-metAce1.maf | gawk '{if($1<4){print "rm "$2}}' >> rmjobs
    end
    chmod +x rmjobs
    rmjobs

    #phyloHMM
    foreach ff(*-methBark1-methMaze1-metAce1.maf)
        set f=`basename $ff -methBark1-methMaze1-metAce1.maf`
	echo $f
	msa_view -i MAF -M $f.fa -o SS $ff > $f.ss
	cat $f.ss | gawk '{if(/^NAMES/){print "NAMES = methBurt1,methBark1,methMaze1,metAce1";} else{print $0;}}' > methBurt
	mv methBurt $f.ss
	phyloFit -i SS $f.ss -t "(metAce1,(methBurt1,(methBark1,methMaze1)))" -o ${f}_MbaMaMmMbu
    end
    
    #Contig57 has the largest NTUPLES (1071) so use it for starting mod
    # it shows GC=0.212928+0.220304=0.433232
    #add GC content to next call
    foreach f(*.ss)
	set b=$f:t:r
	echo $b
	phastCons $f methBurt1.Contig57_MbaMaMmMbu.mod \
	--gc 0.4332 --target-coverage 0.7 --estimate-trees ${b} \
	--expected-lengths 25 --no-post-probs --ignore-missing \
	--nrates 1,1
    end

    cat contigs.txt | gawk '/Contig29/{getline;}{print substr($1,11,length($1));}' > cont.txt
    foreach b(`cat cont.txt`)
	echo $b
	phastCons methBurt1.$b.ss \
	methBurt1.Contig57.cons.mod,methBurt1.Contig57.noncons.mod \
	--target-coverage 0.7 --expected-lengths 25 \
	--viterbi ${b}_methBurt1-elements.bed --score \
	--require-informative 0 --seqname $b > ${b}_cons.dat
	wigEncode ${b}_cons.dat ${b}_phastCons.wig ${b}_phastCons.wib
    end

    #combine phastCons elements into 1 bed file
    cat *elements.bed > phastCons.bed

    #move data
    mkdir wib
    mv Contig*_phastCons.wib wib/.
    mv Contig*_phastCons.wig wib/.
    mkdir /gbdb/methBurt1/wib
    ln -s /cluster/data/methBurt1/bed/conservation/wib/*.wib /gbdb/methBurt1/wib
    mkdir /gbdb/methBurt1/pwMaf
    mkdir -p otherSpp/methBark1 otherSpp/methMaze1 otherSpp/metAce1
    foreach f(`cat cont.txt`)
	echo $f
	mv methBurt1.${f}-methBark1.maf otherSpp/methBark1/$f.maf
	mv methBurt1.${f}-methMaze1.maf otherSpp/methMaze1/$f.maf
	mv methBurt1.${f}-metAce1.maf otherSpp/metAce1/$f.maf
    end
    ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/methBark1 /gbdb/methBurt1/pwMaf/methBark1_pwMaf
    ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/metAce1 /gbdb/methBurt1/pwMaf/metAce1_pwMaf
    ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/methMaze1 /gbdb/methBurt1/pwMaf/methMaze1_pwMaf
    mkdir multiz
    foreach f(`cat cont.txt`)
	echo $f
	mv methBurt1.${f}-methBark1-methMaze1-metAce1.maf multiz/$f.maf
    end
    ln -s /cluster/data/methBurt1/bed/conservation/multiz /gbdb/methBurt1/multizMbuMbaMmMa

    #load
    hgLoadWiggle methBurt1 phastCons /cluster/data/methBurt1/bed/conservation/wib/*_phastCons.wig
    hgLoadMaf -warn methBurt1 multizMbuMbaMmMa
    hgLoadMaf -warn methBurt1 methBark1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/methBark1_pwMaf
    hgLoadMaf -warn methBurt1 metAce1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/metAce1_pwMaf
    hgLoadMaf -warn methBurt1 methMaze1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/methMaze1_pwMaf
    hgLoadBed methBurt1 phastConsElements phastCons.bed 

    #trackDb
    cd ~/kent/src/hg/makeDb/trackDb/archae
    mkdir methBurt1
    cvs add methBurt1
    cd methBurt1
    #trackDb.ra entry
    # track multizMbuMbaMmMa
    # shortLabel Conservation
    # longLabel Methanogen multiz alignments
    # group compGeno
    # priority 10.0
    # visibility pack
    # type wigMaf 0.0 1.0
    # maxHeightPixels 100:40:11
    # wiggle phastCons
    # yLineOnOff Off
    # autoScale Off
    # pairwise pwMaf
    # speciesOrder methBark1 methMaze1 metAce1
    cvs add trackDb.ra
    cvs commit -m "New multiz track" trackDb.ra
    #html page
    cvs add multizMbuMbaMmMa.html
    cvs commit -m "Details page for multiz track" multizMbuMbaMmMa.html
