#!/bin/bash
set -beEu -o pipefail
###############################################################################
#  doGenbankTests
#
#  June 2008 -- Brooke Rhead
#
#  Wrapper script for automated checking of GenBank tables.
#  Note: this script assumes the kent source tree is checked out in ~/kent/src
# 
###############################################################################

tableExists() {
  local db=$1 table=$2
  local res=$(hgsql -Ne "show tables like '$table'" $db)
  if [ -z "$res" ]
  then
    return 1 
  else
    return 0
  fi
}

databaseExists() {
  local db=$1
  local res=$(hgsql -Ne "show databases like '$db'")
  if [ -z "$res" ]
  then
    return 1
  else
    return 0
  fi
}

selectShortLabel() {
  local db=$1 table=$2
  hgsql -NBe "select shortLabel from trackDb where tableName='$table'" $db
}

getShortLabel() {
  local db=$1 table=$2
  local res=$(selectShortLabel $db $table)
  if [ -z "$res" -a "$table" = "all_mrna" ] 
  then
    res=$(selectShortLabel $db mrna)
  elif [ -z "$res" -a "$table" = "all_est" ] 
  then
    res=$(selectShortLabel $db est)
  fi
  echo $res
}

# keep track of errors vs. no errors
allOk=yes

# check host, set path to genbank sanity location 
if [ "$HOST" != hgwdev -a "$HOST" != hgwbeta ]
then
  echo -e "\nThis script must be run from hgwdev or hgwbeta.\n" >&2
  exit 1
elif [ "$HOST" = hgwdev ]
then
  sanityPath="/cluster/data/genbank"
else #host is hgwbeta
  sanityPath="/genbank"
fi

# get arguments / give usage statement
if [ $# -lt 1 -o $# -gt 2 ]
then
  echo -e "\n usage:  $(basename $0) db [outFile]
 where outFile is the name of a file in which to record results\n" >&2
  exit 1
else
  db="$1"
  if ! databaseExists $db
  then
    echo "ERROR: $db does not seem to be a valid database name" >&2
    exit 1
  fi
  if [ $# -eq 2 ]
  then
    exec > "$2" 2>&1  # neat bash trick to redirect stdout (also redirecting error)
  fi
fi

# specify tables that should get each test
# intronEst is a special case; it may exist only in split form
intronEstTables=$(hgsql -Ne "show tables like '%intronEst'" $db)

trackTables="refGene xenoRefGene mgcFullMrna orfeomeMrna all_mrna intronEst  
           all_est xenoMrna xenoEst"
genePredTables="refGene xenoRefGene mgcGenes orfeomeGenes"
pslTables="mgcFullMrna orfeomeMrna all_mrna xenoMrna all_est refSeqAli 
           xenoRefSeqAli xenoEst $intronEstTables"
featureTables="refGene xenoRefGene mgcGenes mgcFullMrna orfeomeGenes 
               orfeomeMrna all_mrna intronEst all_est xenoMrna xenoEst"

# print information about the script at the top of the output
echo "$(basename $0) output on $db"
date
echo

# get a list of all genbank tracks present in this assembly
echo -e "The following GenBank tracks are present in this assembly:\n"
for table in $trackTables
do
  tableExists $db $table && getShortLabel $db $table
done
# intronEst is a special case; it may exist only in split form
if ! tableExists $db intronEst # table is split, but not in trackDb (or not there)
then
  if [ -n "$intronEstTables" ] # split tables exist
  then
    getShortLabel $db intronEst
  fi
fi
echo

# run genePredChecks and pslChecks
echo -e "--> Running genePredCheck and pslCheck:\n"
for table in $genePredTables
do
  tableExists $db $table && {
    cmd="nice genePredCheck -db=$db $table"
    echo $cmd
    $cmd || allOk=no
  }
done
echo
for table in $pslTables
do
  tableExists $db $table && {
    cmd="nice pslCheck -db=$db $table"
    echo $cmd
    $cmd || allOk=no
  }
done
echo

# run joinerCheck
joinerLoc=~/kent/src/hg/makeDb/schema/all.joiner
echo -e "--> Checking appropriate keys in all.joiner:\n"
doJoinerCheck() {
  local table=$1 ident=$2
  tableExists $db $table && {
    cmd="nice joinerCheck -keys -database=$db -identifier=$ident $joinerLoc"
    echo $cmd
    $cmd || allOk=no
    echo
  }
  return 0
}
doJoinerCheck gbCdnaInfo gbCdnas
doJoinerCheck refGene refSeqId
doJoinerCheck xenoRefGene xenoRefSeqId
doJoinerCheck mgcGenes mgcAccession
doJoinerCheck orfeomeGenes orfeomeAccession
doJoinerCheck all_mrna nativeMrnaAccession
doJoinerCheck all_est nativeEstAccession # takes care of intronEst, too
doJoinerCheck xenoMrna xenoMrnaAccession
doJoinerCheck xenoEst xenoEstAccession
echo

# run featureBits
echo -e "--> Running featureBits.  Remember to paste this into the pushqueue:\n"
for table in $featureTables
do
  tableExists $db $table && {
    cmd="nice featureBits -countGaps $db $table"
    echo $cmd
    $cmd || allOk=no
    cmd="nice featureBits -countGaps $db $table gap"
    echo $cmd
    $cmd || allOk=no
    echo
  }
done
echo

# run gbSanity
filename=${db}.sanity.$(date +%m-%d-%Y.%R%p)
echo -e "--> Running gbSanity, and putting output in:"
echo -e "${HOST}:$sanityPath/misc/$filename"
# need to run in a subshell, since gbSanity wants to be run from this dir
(cd $sanityPath && bin/x86_64/gbSanity $db >& misc/$filename) || allOk=no
echo -e "The last line of the gbSanity output gives a summary of errors. 
Here is the last line of the file. If there are errors, consult Mark Diekhans
and Brian Raney:\n"
tail -1 $sanityPath/misc/$filename
echo

# run countPerChrom??

# give overall report of errors
if [ $allOk = yes ]
then
  echo No errors were encountered during $(basename $0).  Yay!
  date
else
  echo "There was an ERROR in at least one test, but $(basename $0) ran successfully."
  date
  exit 2
fi
exit 0
