#!/usr/bin/perl
#
# gbCopyStep [options] buildServer buildRoot db1 [db2 ...]
#
# rsync data/processed and data/aligned files build server to local directory
# available to systems to be updated.  Update the /gbdb/ directory with fasta
# files.  Must be run in local gbRoot directory.  This gets the build.time
# file before locking and with exit without any messages if build.time
# has not changed.
#
# Options:
#   -rsyncPort=port - rsync port to use, or `rsh' or `ssh'. Defaults to ssh.
#   -gbdb=dir - Use this directory for /gbdb (useful in debugging), set to
#    empty or "no" to disable link into /gbdb.
#   -checksum - use rsync checksummming
#   -verbose
# Arguments:`
#   - buildServer - host with build files.
#   - buildRoot - gbRoot directory on buildServer
#   - db1 - List of databses to update
#
# $Id: gbCopyStep,v 1.18 2009/05/21 04:23:12 markd Exp $
#
use strict;
use warnings;
use File::Basename;
use FindBin;
use lib "$FindBin::Bin/../lib";
use gbCommon;

my $checksum = 0;

# use rsync to get build.time file, return path to file, or undef it
# can't be retrieved.
sub getBuildTimeFile($$$) {
    my($rsyncPort, $buildServer, $buildRoot) = @_;

    my $buildTimeTmp = "var/copy/build.time.$gbCommon::hostName.$$.tmp";
    makeFileDir($buildTimeTmp);
    
    my $remBuildFile = "$buildRoot/var/build/build.time";

    my $cmd = "rsync " . getRSyncPortOpt($rsyncPort) . " $buildServer:$remBuildFile "
        . " $buildTimeTmp";
    my $stat = runProgNoAbort($cmd);
    if ($stat != 0) {
        print STDERR "Warning: can't rsync $buildServer:$remBuildFile, will try again later\n";
        return undef;
    } else {
        return $buildTimeTmp;
    }
}

# compare build times, return 1 if the build is newer than what we have.
sub checkBuildTime($$) {
    my($lastBuildTime, $buildTimeTmp) = @_;
    return (loadTimeFile($buildTimeTmp) > loadTimeFile($lastBuildTime));
}


# check if the /gbdb/genbank/processed  directory is on the same file system
# as the local genbank root.  Create directory if it doesn't exist.
sub checkIfGbdbLinkable($) {
    my($gbdbDir) = @_;
    my $processedDir = "data/processed";
    my $gbdbProcessedDir = "$gbdbDir/genbank/data/processed";
    makeDir($gbdbProcessedDir);
    my($processedDev) = stat($processedDir);
    my($gbdbProcessedDev) = stat($gbdbProcessedDir);
    
    return ($processedDev == $gbdbProcessedDev);
}

# link processed fasta to gbdb directory
sub linkProcessedFastas($) {
    my($gbdbDir) = @_;
    # create links
    runPipe("find data/processed -name '*.fa' | cpio -pduml $gbdbDir/genbank");

    # cpio doesn't set the permission on created directories, so force them
    runPipe("find $gbdbDir/genbank/data -follow -type d | xargs chmod a+rx");
}

# rsync the processed and aligned files with the local directory
sub rsyncBuildFiles($$$$@) {
    my($rsyncPort, $buildServer, $buildRoot, $gbdbDir, @databases) = @_;

    # dereference symlinks. so can't use --archive
    # FIXME: add when have newer rsync: --log-format='%b %l %n%L'
    my $baseCmd = "rsync --copy-links --recursive --perms "
        . "--group --times " . getRSyncPortOpt($rsyncPort);
    if ($gbCommon::verbose) {
        $baseCmd .= " -v";
    }
    if ($checksum) {
        $baseCmd .= " -c";
    }
    makeDir("data");

    # get ignore.idx
    my $cmd = $baseCmd . " $buildServer:$buildRoot/etc/ignore.idx etc";
    runProg($cmd);

    # get processed
    $cmd = $baseCmd . " $buildServer:$buildRoot/data/processed data";
    runProg($cmd);

    # link processed files before rsyncing aligned.  This prevents
    # window where load started before fastas are in place.
    if (($gbdbDir ne "no") && ($gbdbDir ne "")) {
        if (checkIfGbdbLinkable($gbdbDir)) {
            linkProcessedFastas($gbdbDir);
        } else {
            # haven't implemented copying with local rsync
            gbError("can't link, not on same filesystem: data/processed and $gbdbDir/genbank/data/Processed");
        }
    }

    # get aligned for each database
    makeDir("data/aligned");
    foreach my $db (@databases) {
        $cmd = $baseCmd .
            " --include='/*/$db/' --exclude='/*/*'"
            . " $buildServer:$buildRoot/data/aligned/ data/aligned";
        runProg($cmd);
    }
}

# use rsync to get ccdsimport.time file.
sub rsyncCcdsImportTimeFile($$$) {
    my($rsyncPort, $buildServer, $buildRoot) = @_;

    my $ccdsImportTime = "var/ccdsimport/ccdsimport.time";
    makeFileDir($ccdsImportTime);
    
    my $remCcdsImportTime = "$buildServer:$buildRoot/$ccdsImportTime";

    my $cmd = "rsync " . getRSyncPortOpt($rsyncPort) . " $remCcdsImportTime"
        . " $ccdsImportTime";
    my $stat = runProgNoAbort($cmd);
    if ($stat != 0) {
        prMsg("Note: can't rsync $remCcdsImportTime");
    }
}

# Entry
my $rsyncPort = "rsh";
my $gbdbDir = "/gbdb";
while (($#ARGV >= 0) && ($ARGV[0] =~ /^-.*/)) {
    my $opt = $ARGV[0];
    shift @ARGV;
    if ($opt =~ /^-verbose=/) {
        $gbCommon::verbose = 1;
    } elsif ($opt eq "-verbose") {
        $gbCommon::verbose = 1;
    } elsif ($opt =~ /^-rsyncPort=/) {
        $rsyncPort = parseOptEq($opt);
    } elsif ($opt =~ /^-gbdb=/) {
        $gbdbDir = parseOptEq($opt);
    } elsif ($opt =~ /^-checksum$/) {
        $checksum = 1;
    } else {
        gbError("invalid option \"$opt\"");
    }
}
if ($#ARGV < 2) {
    gbError("wrong # args: gbCopyStep [options] buildServer buildRoot db1 [db2 ...]");
}
my($buildServer, $buildRoot, @databases) = @ARGV;

# Retrieve and check the build.time file before opening the log file.
# This allows for frequent polling without unnecessary noise.
my $buildTimeTmp = getBuildTimeFile($rsyncPort, $buildServer, $buildRoot);
my $lastBuildTime = "var/copy/build.time";
my $copyTime = "var/copy/copy.time";

if (defined($buildTimeTmp)
    && (checkBuildTime($lastBuildTime, $buildTimeTmp) || (!-e $copyTime))) {

    # build time has changed
    beginTask("copy", "copy");

    rsyncBuildFiles($rsyncPort, $buildServer, $buildRoot, $gbdbDir, @databases);

    # install time files.
    makeTimeFile($copyTime);
    renameFile($buildTimeTmp, $lastBuildTime);

    # ccds time file; it will not be rsynced if a genbank build didn't happen,
    # but let that slide to avoid adding a more complex script
    rsyncCcdsImportTimeFile($rsyncPort, $buildServer, $buildRoot);

    endTask();
} else {
    # not new build.
    unlink($buildTimeTmp);
}
