#!/usr/bin/env perl

##Author: Yontao Lu
##Date Jan, 31 03
##Des: This script will create stsMapRat.bed file. It will read stsInfoRat.bed 
##and stsMarkers_pos.rdb file and then output the Map.bed file.
##The map.bed contains marker aligning postion in chr and different map location.

# $Id: createStsBed,v 1.3 2006/09/18 22:38:05 hiram Exp $

use strict;
use warnings;

use Text::ParseWords;

if(@ARGV < 3)
{
    print STDERR "Usage: createStsBed infobed markerfile threshold.
Working for both rat and Mouse.\n";
    exit(1);
}

my $name = shift(@ARGV);
my $pos = shift(@ARGV);
my $threshold = shift(@ARGV);

open(IF1, "<$name") || die "Can not open info_bed $name: $!"; #Using the mgi.newconvert.tx

my %ids;
my %names;
my %RHCHR;
my %RHPOS;
my %RHLod;
my %FHHChr;
my %FHHPos;
my %SHRChr;
my %SHRPos;

# Read in info from info file
print STDERR "First Reading $name\n";
while (my $line = <IF1>) 
{
  chomp($line);
  my @fields = split(/\t/,$line);
  my $id = $fields[0];
  
  $ids{$id} = $id;
  $names{$id} = $fields[1];

  $RHCHR{$id} = $fields[19];
  $RHPOS{$id} = $fields[20];
  $RHLod{$id} = $fields[21];

  $FHHChr{$id} = $fields[13];
  $FHHPos{$id} = $fields[14];
  
  $SHRChr{$id} = $fields[16];
  $SHRPos{$id} = $fields[17];
}


close(IF1);

open(IF2, "<$pos") || die "can not open $pos: $!"; #get stsMarkerPos.rdb
open(OF, ">stsMap.error") or die "Can not write to stsMap.error: $!";
open(OF2, ">$$.ttemp") or die "Can not write to $$.ttemp: $!";

#get rid of the header of rdb
for(my $i = 0; $i<2; $i++)
{
    my $line =<IF2>;
}

my $score = 1000;

print STDERR "Second Reading $pos\n";
while(my $line = <IF2>)
{#read rdb and add markerId and name to each line for outputing.
    chomp($line);
    my $chr = "xxChr";
    my $start = "xxStart";
    my $end = "xxEnd";
    my $id = "xxId";
    my $times = "xxTimes";
    my $acc = "xxAcc";
    my @rest;
    $rest[0] = "xxRest0";
    ($chr, $start, $end, $id, $times, $acc, @rest) = split("\t", $line);
    
    $score = 1000;
    if(exists $ids{$id})
    {
	die "not defined chr" if ! defined($chr);
	die "not defined start" if ! defined($start);
	die "not defined end" if ! defined($end);
	die "not defined id" if ! defined($id);
	die "not defined times" if ! defined($times);
	die "not defined acc" if ! defined($acc);
	$names{$id} = "" if ! exists($names{$id});
	$RHCHR{$id} = "" if ! exists($RHCHR{$id});
	$RHPOS{$id} = "" if ! exists($RHPOS{$id});
	$RHLod{$id} = "" if ! exists($RHLod{$id});
	$FHHChr{$id} = "" if ! exists($FHHChr{$id});
	$FHHPos{$id} = "" if ! exists($FHHPos{$id});
	$SHRChr{$id} = "" if ! exists($SHRChr{$id});
	$SHRPos{$id} = "" if ! exists($SHRPos{$id});
	$names{$id} = "" if ! defined($names{$id});
	$RHCHR{$id} = "" if ! defined($RHCHR{$id});
	$RHPOS{$id} = "" if ! defined($RHPOS{$id});
	$RHLod{$id} = "" if ! defined($RHLod{$id});
	$FHHChr{$id} = "" if ! defined($FHHChr{$id});
	$FHHPos{$id} = "" if ! defined($FHHPos{$id});
	$SHRChr{$id} = "" if ! defined($SHRChr{$id});
	$SHRPos{$id} = "" if ! defined($SHRPos{$id});
	print OF2 "$chr\t$start\t$end\t$names{$id}\t$score\t$id\t$acc\t@rest\t$RHCHR{$id}\t$RHPOS{$id}\t$RHLod{$id}\t$FHHChr{$id}\t$FHHPos{$id}\t$SHRChr{$id}\t$SHRPos{$id}\t$times\n";
    }
    else
    {
	print OF "Can not find $id\n";
	
    }
    
}

close(OF2);
close(OF);

# sort the file so all items with same markerId will together.
#	Help to count hits.

`sort -k 6  -g $$.ttemp > $$.tttemp`;
`rm $$.ttemp`; 

open(IF2, "<$$.tttemp") or die "Can not read from $$.tttemp: $!";

print STDERR "Third Reading $$.tttemp\n";

#count hits for each marker and then cal score for it. Get the those passing the threshold to bed file. 
my @lines;
my $line = <IF2>;
chomp($line);
my @items = split(/\t/, $line);
$lines[0] = $line;
my $preId = $items[5];
my $time = 1;
while($line = <IF2>)
{
    chomp($line);
    my @item = split(/\t/, $line);
    if($item[5] == $preId)
    {
	$lines[$time] = $line;
	$time++;
    }
    else
    {
	if( ($time + $item[-1])== 2)
	{
	    my @eles = split(/\t/,$lines[0]);
	    $eles[4] = $score;
	    my @temp = @eles[0..14];
	    my $newline = join("\t", @temp);
	    print "$newline\n";
	    $preId = $item[5];
	    $lines[0] = $line;
	
	}
	else
	{
	    my $newScore = (int(1000 - (($time + $item[-1]) - 2) * 250 > 0) ? int(1000 - ($time - 1) * 250) : 0);
	    if($newScore >= $threshold)
	    {
		for(my $i = 0; $i < $time; $i++)
		{
		    my @eles = split(/\t/,$lines[$i]);
	            $eles[4] = $newScore;
		    my @temp = @eles[0..14];
		    my $newline = join("\t", @temp);
		    print "$newline\n";
		}
	    }
	    $lines[0] = $line;
	    $time = 1;
	    $preId = $item[5];
	}
    }
}

close(IF2);
`rm $$.tttemp`; 
