#!/usr/bin/perl
#
# findUnusedReleases
#
# $Id: findUnusedReleases,v 1.2 2004/06/07 02:43:17 genbank Exp $
#
use strict;
use warnings;
use File::Basename;
use FindBin;
use lib "$FindBin::Bin/../lib";
use gbCommon;

my $usage = 
    "findUnusedReleases [-details]\n"
    . "Compare releases with extFile dumps under: \n"
    . "   var/dbload/*/extFile/\$date/*.extFile.txt\n"
    . "to look for unused releases that can be removed\n";

# table of release -> list of host:db
my %releaseUsage;

# table of release -> list of gbdb files
my %releaseGbdb;

# table of gbdb files to host:db
my %gbdbUsage;

# find the newest extFile dumpDir
sub getNewestGpExtFileDir($) {
    my($host) = @_;

    my @dirs = glob("var/dbload/$host/extFile/*.*.*");
    if ($#dirs < 0) {
        gbError("no extFile dumps for $host");
    }
    @dirs = sort(@dirs);
    return $dirs[$#dirs];  # last one
}

# add a gbExtFile row to the table of releases in use.
sub addHostDbReleases($$$) {
    my($host, $db, $extFile) = @_;
    my @parts = split(/\//, $extFile);
    my $rel = $parts[$#parts-2];
    my $hostDb = $host . ":" . $db;

    # add release to host:db
    if (!defined($releaseUsage{$rel})) {
        $releaseUsage{$rel} = [$hostDb];
    } elsif (!inListRef($hostDb, $releaseUsage{$rel})) {
        push(@{$releaseUsage{$rel}}, $hostDb);
    }

    # add release to gbdb file
    if (!defined($releaseGbdb{$rel})) {
        $releaseGbdb{$rel} = [$extFile];
    } elsif (!inListRef($extFile, $releaseGbdb{$rel})) {
        push(@{$releaseGbdb{$rel}}, $extFile);
    }

    # add gbdb file to host:db
    if (!defined($gbdbUsage{$extFile})) {
        $gbdbUsage{$extFile} = [$hostDb];
    } elsif (!inListRef($hostDb, $gbdbUsage{$rel})) {
        push(@{$gbdbUsage{$extFile}}, $hostDb);
    }
}

# add a gbExtFile dump to the table of releases in use.
sub addHostReleases($$) {
    my($host, $extFileDump) = @_;

    #var/dbload/hgw1/extFile/2004.05.28/fr1.extFile.txt
    my $db = basename($extFileDump, (".extFile.txt"));
    foreach my $line (split(/\n/, readFile($extFileDump))) {
        my @row = split(/\t/, $line);
        addHostDbReleases($host, $db, $row[1]);
    }
}

# add gbExtFile dumps to the table of releases in use.
sub addReleases() {
    foreach my $extFileDir (glob("var/dbload/*/extFile")) {
        my $host = basename(dirname($extFileDir));
        foreach my $extFileDump (glob(getNewestGpExtFileDir($host) . "/*.extFile.txt")) {
            addHostReleases($host, $extFileDump);
        }
    }
}

# print information about a release
sub printRelInfo($$) {
    my($rel, $details) = @_;
    print $rel . "\t" . join("\t", sort(@{$releaseUsage{$rel}})) . "\n";
    if ($details) {
        foreach my $extFile (sort(@{$releaseGbdb{$rel}})) {
            print "\t" . "$extFile" . "\t" . join("\t", sort(@{$gbdbUsage{$extFile}})) . "\n";
        }
    }
}


my $details = 0;
while (($#ARGV >= 0) && ($ARGV[0] =~ /^-.*/)) {
    my $opt = $ARGV[0];
    shift @ARGV;
    if ($opt =~ /^-details$/) {
        $details = 1;
    } else {
        gbError("invalid option \"$opt\"\n$usage");
    }
}
if ($#ARGV >= 0) {
    gbError("wrong # args: $usage");
}

addReleases();
foreach my $rel (sort(keys(%releaseUsage))) {
    printRelInfo($rel, $details);
}
