#!/usr/bin/perl
#
# dump-test-tables db outdir
#
# Dump tables from a test database in a form that can be diffed.  This removes
# variant fields, such as ids and timestamps and sorts the results.  This can
# be used to compare the results of loading in different ways, such as loading
# all available updates, with incrementally obtained updates.  
# 
use strict;
use warnings;
use File::Basename;
use FindBin;
use lib "$FindBin::Bin/../../lib";
use gbCommon;

setTaskName("dump-test-tables");

# command to run mysql
my $MYSQL = "hgu-sql -N";

# run mysql and output results to a file
sub runMySql($$) {
    my($outFile, $sqlCmd) = @_;
    runProg("$MYSQL -e '$sqlCmd' $main::db > $outFile");
}

# run mysql and return results as a list of lines
sub callMySql($) {
    my($sqlCmd) = @_;
    my $out = callProg("$MYSQL -e '$sqlCmd' $main::db");
    chomp($out);
    return split('\n', $out);
}

if ($#ARGV != 1) {
    die("wrong # args: dump-test-tables db outdir");
    }
$main::db = $ARGV[0];
$main::outdir = $ARGV[1];
my $tbldir = "$main::outdir/tbls";
makeDir($tbldir);

# save load directory
if (-e "load") {
    removeDir("$main::outdir/load");
    renameFile("load", "$main::outdir/load");
}

my @chroms = callMySql("select chrom from chromInfo");
push(@chroms, "all");

my %tables;
foreach my $table (callMySql("show tables")) {
    $tables{$table} = 1;
}

# Used to generate psl chromosome (and all) table names. Tables existance is
# check, so this can be a super-set.
my @pslTables;
foreach my $type ("mrna", "est", "intronEst") {
    foreach my $chr (@chroms) {
        push(@pslTables, $chr . "_" . $type);
    }
}
push(@pslTables, "xenoMrna");
push(@pslTables, "xenoEst");
foreach my $chr (@chroms) {
    push(@pslTables, $chr . "_" . "intronEst");
}

foreach my $tbl (@pslTables) {
    if (defined($tables{$tbl})) {
        runMySql("$tbldir/$tbl.tbl",
                 "SELECT * FROM $tbl ORDER BY "
                     . "tName,tStart,tEnd,qName,qStart,qEnd");
    }
}

# *OrientInfo tables
foreach my $type ("mrna", "est") {
    my $tbl = "${type}OrientInfo";
    if (defined($tables{$tbl})) {
        runMySql("$tbldir/$tbl.tbl", 
             "SELECT * FROM $tbl ORDER BY chrom,chromStart,chromEnd,name");
    }
}

# get contents of mrna tables, joining the values in the unique name table
my $mrnaSel =
    "SELECT gbCdnaInfo.acc, gbCdnaInfo.version, gbCdnaInfo.moddate, gbCdnaInfo.type, gbCdnaInfo.direction,"
    . "        author.name AS author, cds.name AS cds, cell.name AS cell,"
    . "        description.name AS description, "
    . "        development.name AS development, geneName.name AS geneName,"
    . "        keyword.name AS keyword, library.name AS library, "
    . "        mrnaClone.name as mrnaClone, organism.name AS organism,"
    . "        productName.name AS productName, sex.name AS sex, "
    . "        source.name AS source, tissue.name AS tissue "
    . " FROM gbCdnaInfo,author,cds,cell,description,development,geneName,keyword,"
    . "      library,mrnaClone,organism,productName,sex,source,tissue"
    . " WHERE gbCdnaInfo.author=author.id AND gbCdnaInfo.cds=cds.id"
    . "       AND gbCdnaInfo.cell=cell.id AND gbCdnaInfo.description=description.id"
    . "       AND gbCdnaInfo.development=development.id AND gbCdnaInfo.geneName=geneName.id"
    . "       AND gbCdnaInfo.keyword=keyword.id AND gbCdnaInfo.library=library.id"
    . "       AND gbCdnaInfo.mrnaClone=mrnaClone.id AND gbCdnaInfo.organism=organism.id"
    . "       AND gbCdnaInfo.productName=productName.id AND gbCdnaInfo.sex=sex.id"
    . "       AND gbCdnaInfo.source=source.id AND gbCdnaInfo.tissue=tissue.id"
    . "       ORDER BY acc";

runMySql("$tbldir/gbCdnaInfo.tbl", $mrnaSel);

# seq dropping any file specific info (which might be differed)
runMySql("$tbldir/gbSeq.tbl",
         "SELECT acc,version,size,file_size,type,srcDb FROM gbSeq ORDER BY acc");

# this always differs, save with no compare esxtension
runMySql("$tbldir/gbSeq.nctbl",
         "SELECT * FROM gbSeq ORDER BY acc");
runMySql("$tbldir/gbExtFile.nctbl",
         "SELECT * FROM gbExtFile ORDER BY id");
runMySql("$tbldir/gbSeqExtFile.nctbl",
         "SELECT acc,gbSeq.id,path,gbExtFile.id FROM gbSeq,gbExtFile WHERE gbSeq.gbExtFile = gbExtFile.id ORDER BY acc");

# gbStatus, save select data for diff, plus full table
runMySql("$tbldir/gbStatus.tbl",
         "SELECT acc,version,modDate,type,srcDb,numAligns FROM gbStatus ORDER BY acc");
runMySql("$tbldir/gbStatus.nctbl",
         "SELECT * FROM gbStatus ORDER BY acc");

# refseq-specific tables
my $refLinkSel =
    "SELECT refLink.name,refLink.product,refLink.mrnaAcc,refLink.protAcc,"
    . "       geneName.name,productName.name,refLink.locusLinkId,refLink.omimId "
    . "FROM refLink,geneName,productName "
    . "WHERE refLink.geneName=geneName.id and refLink.prodName=productName.id "
    . "ORDER BY refLink.mrnaAcc";
    
runMySql("$tbldir/refLink.tbl", $refLinkSel);

runMySql("$tbldir/refSeqStatus.tbl",
         "SELECT mrnaAcc,status FROM refSeqStatus ORDER BY mrnaAcc");
runMySql("$tbldir/refSeqSummary.tbl",
         "SELECT mrnaAcc,completeness,summary FROM refSeqSummary ORDER BY mrnaAcc");

runMySql("$tbldir/refSeqAli.tbl",
         "SELECT * FROM refSeqAli ORDER BY tName,tStart,tEnd,qName,qStart,qEnd");
runMySql("$tbldir/refGene.tbl",
         "SELECT * FROM refGene ORDER BY name,chrom,txStart,txEnd");
runMySql("$tbldir/refFlat.tbl",
         "SELECT * FROM refFlat ORDER BY geneName,name,chrom,txStart,txEnd");

if (defined($tables{xenoRefSeqAli})) {
    runMySql("$tbldir/xenoRefSeqAli.tbl",
             "SELECT * FROM xenoRefSeqAli ORDER BY tName,tStart,tEnd,qName,qStart,qEnd");
    runMySql("$tbldir/xenoRefGene.tbl",
             "SELECT * FROM xenoRefGene ORDER BY name,chrom,txStart,txEnd");
    runMySql("$tbldir/xenoRefFlat.tbl",
             "SELECT * FROM xenoRefFlat ORDER BY geneName,name,chrom,txStart,txEnd");
}

runMySql("$tbldir/gbLoaded.tbl",
         "SELECT srcDb,loadRelease,loadUpdate,type,accPrefix FROM gbLoaded ORDER BY srcDb,loadRelease,loadUpdate,type,accPrefix");

runMySql("$tbldir/imageClone.tbl",
         "SELECT imageId,acc,type FROM imageClone ORDER BY imageId,acc");

