#!/usr/bin/awk -f 


BEGIN {
    usage="alignStats [-h] db/update/*.alidx ...\n\n\
Generate alignment statistics from a list of alidx files.  Needs db and\n\
update directories in paths.";
    if (ARGV[1] ~ /^-h/) {
        print usage;
        exit(0);
    }
    FS = "\t";
    OFS = "\t";
    numDbs = 0;
}

FNR == 1 {
    # new file, parse name
    n = split(FILENAME, a, "/");
    db = a[n-2];
    update = a[n-1];
    n = split(a[n], a, ".");
    type = a[1];
    if (type == "est") {
        orgCat = a[3];
    } else {
        orgCat = a[2];
    }
    orgCatIdx = db "\t" type "\t" orgCat;
    typeIdx = db "\t" type "\ttotal";
    dbIdx = db "\ttotal\ttotal";

    if (cnts[orgCatIdx] == "") {
        cnts[orgCatIdx] = 0;
        alns[orgCatIdx] = 0;
    }
    if (cnts[typeIdx] == "") {
        cnts[typeIdx] = 0;
        alns[typeIdx] = 0;
    }
    if (cnts[dbIdx] == "") {
        cnts[dbIdx] = 0;
        alns[dbIdx] = 0;
        dbList[++numDbs] = db;
    }
    next;  # skip header
}

#acc	version	numaligns
{
    cnts[dbIdx]++;
    alns[dbIdx] += $3;
    cnts[typeIdx]++;
    alns[typeIdx] += $3;
    cnts[orgCatIdx]++;
    alns[orgCatIdx] += $3;
}

function prCnts(db, type, orgCat) {
    key = db "\t" type "\t" orgCat;
    if (cnts[key] != "") {
        print db,type,orgCat,cnts[key],alns[key];
    }
}

END {
    print "#db","type","orgCat","num","aligns";
    for (idb = 1; idb <= numDbs; idb++) {
        db = dbList[idb];
        prCnts(db,"mrna","native");
        prCnts(db,"mrna","xeno");
        prCnts(db,"mrna","total");
        prCnts(db,"est","native");
        prCnts(db,"est","xeno");
        prCnts(db,"est","total");
        prCnts(db,"total","total");
    }
}
