#!/usr/bin/perl -wT
$ENV{'PATH'} = '/usr/local/bin:/bin';
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};   # Make %ENV safer

######################################################################
# written by: Belinda Giardine
######################################################################
use strict;

#this converts from 1 set of coordinates to another using a PSL file

my $psl = shift @ARGV;
my $seq = shift @ARGV;
my $input = shift @ARGV;
my $pslLine;
if (@ARGV) { $pslLine = shift @ARGV; }
if (!$psl or !$input or !$seq) { 
   print "USAGE: convert_coors file.psl seqName_qName_in_file 999[-99,+99]\n";
   print "Where the 9's are replaced by the coding sequence position or genomic position\n";
   exit;
}
if ($input =~ /![0-9+-]/) {
   print "ERROR needs a integer with possibly a + or - into the intron to convert\n";
}
my $offset = '';
if ($input =~ /\d+([+*-]\d+)/) {
   $offset = $1;
   $input =~ s/[+*-]\d+//;
   if ($offset =~ /\*/) { $offset =~ s/\*/+/; }
}
if ($input < 0 && $offset ne '') {
   print "ERROR can't have offset outside of exon pos=$input offset=$offset\n";
}
my $fh;
my @f; #fields in psl file that correspond to origin seq
if (!$pslLine) {
   open($fh, '<', $psl) or die "ERROR couldn't open gene decription file, $psl, $!\n";
   while (<$fh>) {
      #format: bin,matches,misMatches,repMatches,nCount,qNumInsert,qBaseInsert,tNumInsert
      #tBaseInsert,strand  qName   qSize   qStart  qEnd    tName   tSize   tStart  tEnd    blockCount
      #blockSizes      qStarts tStarts
      chomp;
      if (!defined or $_ eq '') { next; }
      if (/\s*#/) { next; } #comment
      @f = split(/\s+/);
      if (scalar @f < 21) { die "ERROR bad psl file format $_\n"; }
      elsif (scalar @f == 22) { shift @f; } #remove bin, not in downloads
      if ($f[9] eq $seq) { last; }
   }
   close $fh or die "ERROR reading psl file, $psl $!\n";
}else {
   @f = split(/\s+/, $pslLine);   
   if (scalar @f < 21) { die "ERROR bad pslLine format $pslLine\n"; }
   elsif (scalar @f == 22) { shift @f; } #remove bin, not in downloads
}

if ($f[9] ne $seq) {
   print "ERROR couldn't find $seq in $psl have $f[9]\n";
   exit;   
}

my @gbStarts = split(/,/, $f[19]); #these are counting from end of - strands
my @gbLens = split(/,/, $f[18]);
my @chrStarts = split(/,/, $f[20]);
my $totBlocks = $f[17];
my $gbSt = $f[11]; #start of alignment in query sequence
my $chromPos;
my $chr = $f[13];

if ($f[8] =~ /^\+/) { #plus strand
   my $b;
   if ($input < 0) {
      $chromPos = $chrStarts[0] + $input;
   }else {
      for($b = 0; $b < $totBlocks; $b++) {
         if ($input <= $gbStarts[$b] + $gbLens[$b]) {
            if ($input < ($gbStarts[$b] + 1)) { #1 based comparison
               print "ERROR: Couldn't map $seq $input, it is in a gap\n";
               exit;
            }
            my $diff = $input - ($gbStarts[$b] + 1); #1 based difference
            $chromPos = $chrStarts[$b] + $diff;
            last;
         }
      }
      if ($b == $totBlocks && $input > $gbStarts[$b-1] + $gbLens[$b-1]) {
         my $pos = $gbStarts[$b-1] + $gbLens[$b-1];
         print "ERROR $input not in gene, end of gene at $pos\n";
         exit;
      }
   }
}else { #minus strand
   #chrom always +?, query can be -
   if ($input < 0) {
      $chromPos = $chrStarts[$totBlocks-1] + $gbLens[$totBlocks-1] - 1 + abs($input);
   }else {
      my $pos = $gbSt; #check for gap at beginning
      my $b;
      for($b = $totBlocks - 1; $b >= 0; $b--) {
         $pos += $gbLens[$b];
         #check for gap
         my $gap = 0;
         if ($b != $totBlocks -1 && $gbStarts[$b+1] != $gbStarts[$b] + $gbLens[$b]) {
            $gap = $gbStarts[$b+1] - ($gbStarts[$b] + $gbLens[$b]);
            $pos += $gap;
         }
         if ($input <= $pos) {
            if ($input <= $pos - $gbLens[$b]) {
               print "ERROR: Couldn't map $seq $input, it is in a gap\n";
               exit;
            }
            my $diff = $pos - $input;
            $chromPos = $chrStarts[$b] + $diff;
            #zero based start as chrStart is zero based
            last;
         }
      }
      if ($b < 0 && $input > $pos) {
         print "ERROR $input not in gene, end of gene at $pos\n";
         exit;
      }
   }
}

if (!defined $chromPos) { print "ERROR couldn't find chr position for $input $offset in $seq\n"; exit; }
$chromPos++; #switch to 1 based number
if ($offset) {
   if ($f[8] =~ /^\+/) { #plus strand
      $chromPos += $offset;
   }else {
      $chromPos -= $offset;
   }
}
print "$input $offset is mapped to $chr $chromPos $f[8]\n";

exit;
