#!/usr/bin/perl
my($name,$nreps,$sample_size,$seed)=@ARGV;

if(!defined($nreps)){$nreps=1};
if(!defined($seed)){$seed=time()};
if(!defined($sample_size)){$sample_size=10000};

$base_dir="/media/storage/wcgrid_structures/spicker_files";
$bin_dir="/media/data/home/lhhung/bin";
$score_dir="/media/storage/complete_zhang_tmscores";
$output_dir="/media/storage/randomized_scores";
$sequence_dir="/media/storage/zhang_sequences/";

my (@ranks,@tranks,@coords,@tms,@rms);
get_ranks("/$score_dir/$name.tmscores",\@tranks,\@tms,\@rranks,\@rms);
my $nca=read_traj("/media/storage/complete_zhang_matrices/$name.rmsd.matrix.tra2",\@coords);
unless (-e "$output_dir/$name"){mkdir("$output_dir/$name")};

my $nstructs_all=$#tranks+1;

mkdir(temp.$$);
chdir(temp.$$);

system("cp $sequence_dir/$name.zhang.seq seq.dat");
open (OUT,">rmsinp") || die;
printf OUT "1 %d\n%d",$nca,$nca;
close OUT;

#do nreps
foreach my $i (0..$nreps-1){
 #find random map so that myrank[i]=rrank[map[i]], myid[i]=ids[rank[i]];
 my @map= find_n_random_numbers($sample_size,$nstructs_all);
 my (@my_rranks)=rerank(\@map,\@rranks);
 my (@my_tranks)=rerank(\@map,\@tranks);
 my ($rmean,$rvar)=mean_var(\@map,\@rms); 
 my ($tmean,$tvar)=mean_var(\@map,\@tms); 
 #output mapfile and coords file
 open (OUT1,">$output_dir/$name/$name.$seed.$i.map") || die;
 open (OUT2,">$output_dir/$name/$name.$seed.$i.tra") || die;
 foreach my $m (@map){
  print OUT1 "$m\n";
 }
 close OUT1;
 foreach my $m (0..$#map){
  #start all spicker models at 1 instead of zero - might not matter but just in case
  my $mm=$map[$m];
  my $m1=$m+1;
  print OUT2 "$nca 0 $m1 $m1\n$coords[$mm]\n";
 }
 close OUT2;
 #set up the  input file
 open (OUT,">tra.in") || die;
 printf OUT "1 -1 -1\n%s","$output_dir/$name/$name.$seed.$i.tra";
 close OUT; 
 system("$bin_dir/spicker");
 
 my @results=read_spicker_stats("rst.dat");
 open (OUT,">results") || die;
 foreach my $r1 (@results){
  #start all spicker models at 1 instead of zero - might not matter but just in case
  my $r=$map[$r1-1];
  printf OUT "%5d %8.3f %8.3f %5d %5d %5d %5d %8.5f %8.5f %8.5f %8.5f\n",$r,$tms[$r],$rms[$r],$tranks[$r],$rranks[$r],$my_tranks[$r],$my_rranks[$r],$my_tranks[$r]/$sample_size,$my_rranks[$r]/$sample_size,
                                                                         ($tmean-$tms[$r])/sqrt($tvar),($rms[$r]-$rmean)/sqrt($rvar);
 }
 close(OUT);
 system("cp rst.dat $output_dir/$name/$name.$seed.$i.spicker.dat");
 system("cp results $output_dir/$name/$name.$seed.$i.spicker.res");
 exit;
}



exit;

foreach my $i(0..99){
 print "$test[$i]\n";
}
exit;
foreach my $b (0..$#bin_files){
 if(!$nthreads || ($b % $nthreads == $thread)){
  my $bin_file=$bin_files[$b];
  
  my $name;
  if($bin_file =~/s\/([0-9\_a-zA-Z]+)\.rmsd/){
   $name=$1;
  }
  unless (-e "$base_dir/$name"){mkdir("$base_dir/$name")}
  chdir("$base_dir/$name");
  my $list_file=substr($bin_file,0,-3)."list";
  print stderr "working  on $list_file\n";
  exit;
  my @seq;
  my $nca=get_sequence($list_file,\@seq);
  open (OUT,">seq.dat") || die;
  foreach my $i(1..$nca){
   printf OUT "%5d %5s\n",$i,$seq[$i];
  }
  close OUT;
  open (OUT,">rmsinp") || die;
  printf OUT "1 %d\n%d",$nca,$nca;
  close OUT;
  open (OUT,">tra.in") || die;
  printf OUT "1 -1 -1\n%s",substr($list_file,0,-4)."tra2";
  close OUT; 
  system("../spicker");
 }
 #convert_list_to_spicker($list_file);
}
#create spicker traj file

sub get_ranks{
 my($file,$tranks,$gtms,$rranks,$grms)=@_;
 my $line;
 open (FIL,$file) || die "can't open $file\n";
 while (defined($line=<FIL>)){
  my ($decoy,$tm,$rm)=split(' ',$line);
  push(@tms,$tm);
  push(@rms,$rm);
 }
 close(FIL);
 my (@tsorted)=sort{if($tms[$a]==$tms[$b]){$rms[$a]<=>$rms[$b]}else{$tms[$b]<=>$tms[$a]}}(0..$#tms);
 my (@rsorted)=sort{if($rms[$a]==$rms[$b]){$tms[$b]<=>$tms[$a]}else{$rms[$a]<=>$rms[$b]}}(0..$#tms);
 #convert to ranks - equal ranks
 my $r=0;my $d=1;my $sum=0;
 my $i=0;
 while($i<=$#tms){
  if($i==$#tms || $tms[$tsorted[$i]] != $tms[$tsorted[$i+1]] || $rms[$tsorted[$i]] != $rms[$tsorted[$i+1]]){
   $tranks->[$tsorted[$i]]=$i;
   $i++;
  }
  else{
   my $n=$i+1;
   while($n<$#tms && $tms[$tsorted[$n]] == $tms[$tsorted[$n+1]] && $rms[$tsorted[$n]] == $rms[$tsorted[$n+1]]){
    $n++;    
   }
   my $rank=($i+$n)/($n-$i+1);
   foreach my $k($i..$n){ 
    $tranks->[$tsorted[$k]]=$rank;
   }
   $i=$n+1;
  }
 }
 my $i=0;
 while($i<=$#tms){
  if($i==$#tms || $tms[$rsorted[$i]] != $tms[$rsorted[$i+1]] || $rms[$rsorted[$i]] != $rms[$rsorted[$i+1]]){
   $rranks->[$rsorted[$i]]=$i;
   $i++;
  }
  else{
   my $n=$i+1;
   while($n<$#tms && $tms[$rsorted[$n]] == $tms[$rsorted[$n+1]] && $rms[$rsorted[$n]] == $rms[$rsorted[$n+1]]){
    $n++;    16141
   }
   my $rank=($i+$n)/($n-$i+1);
   foreach my $k($i..$n){ 
    $rranks->[$rsorted[$k]]=$rank;
   }
   $i=$n+1;
  }
 }
 @{$gtms}=@tms;
 @{$grms}=@rms;
}

sub read_traj{
 my($file,$coords)=@_;
 #started some of these at 1 instead of 0
 #returns it with the right numbering starting at 0
 my @lines=split(/\n/,`cat $file`);
 my ($nca)=split(' ',$lines[0]);
 my $ndecoys=@lines/($nca+1);
 printf "%d decoys %d lines %d read in\n",$ndecoys,$ndecoys*($nca+1),$#lines+1;
 foreach my $i(0..$ndecoys-1){
  $coords->[$i]=join("\n",@lines[$i*($nca+1)+1..$i*($nca+1)+$nca]);
 }
 return($nca); 
}

sub find_n_random_numbers{
 my ($n,$range)=@_;
 #creates an array of n random numbers from range of 0 to range-1
 my(@set)=(0..$range-1);
 foreach my $k(0..$n-1){
  my $i= int rand($range-$k);
  my $temp=$set[$i];
  $set[$i]=$set[$range-$k-1];
  $set[$range-$k-1]=$temp;
 }
 return(@set[-$n..-1]);
}

sub convert_list_to_spicker{
 my ($file)=@_;
 my ($line,@names);
 open (FIL,$file) || die "can't open $file\n";
 while (defined($line=<FIL>)){
  my ($temp)=split(' ',$line);
  if ($temp =~ /pdb/){ push(@names,$temp)}
 }
 close(FIL);
 my $out_file=substr($file,0,-4)."tra";
 open(OUT,">$out_file") || die;
 foreach my $i (0..$#names){
  my $name=$names[$i];
  my @lines=get_CAs($name,$i+1);
  foreach my $line(@lines){
   print OUT "$line\n";
  }
 }
 close(OUT);
}
sub get_sequence{
 my ($list,$seq)=@_;
 my (@res_ids);
 my ($line,$pdb);
 open (FIL,$list) || die "can't open $list\n";
 if (defined($line=<FIL>)){
  ($pdb)=split(' ',$line);
 }
 close(FIL);
 #counts for CAs and any number of dupes
 my $nca=0;
 open (FIL,$pdb) || die "can't open $pdb\n";
 my $e=0;
 if(defined($line=<FIL>)){
  if($line=~/\=([\-0-9\.]+)/){
   $e=$1;
  }
 }
 while (defined($line=<FIL>)){
  if (substr($line,0,4) eq "ATOM" && substr($line,13,2) eq "CA"){ 
   my $seqnum=sprintf "%d",substr($line,22,4);
   $res_ids[$seqnum]=substr($line,17,3);
   $nca++;
  }
 }
 close(FIL);
 @{$seq}=@res_ids;
 return($nca);K
}


sub get_CAs{
 my ($file,$n)=@_;
 #counts for CAs and any number of dupes
 my ($line);
 my @out;
 my $nca=0;
 open (FIL,$file) || die "can't open $file\n";
 my $e=0;
 if(defined($line=<FIL>)){
  if($line=~/\=([\-0-9\.]+)/){
   $e=$1;
  }
 }
 while (defined($line=<FIL>)){
  if (substr($line,0,4) eq "ATOM" && substr($line,13,2) eq "CA"){
   $nca++;
   push(@out,substr($line,30,24));
  }
 }
 close(FIL);
 unshift(@out,"$nca $e $n $n");
 return(@out);
}
sub count_atoms{
 my ($file)=@_;
 #counts for CAs and any number of dupes
 my ($line,$nca,%CA_seen,%coords_seen,$ndupes);
 open (FIL,$file) || die "can't open $file\n";
 while (defined($line=<FIL>)){
  if (substr($line,0,4) eq "ATOM"){
   $nca++;
   my $resnumc=substr($line,22,6).substr($line,12,4);
   my $coords_str=substr($line,30,24);
   if($CA_seen{$resnumc}++ || $coords_seen{$coords_str}++ ){
    $ndupes++;
   }
  }
 }
 close(FIL);
 return($nca,$ndupes);
}
sub read_spicker_stats{
 my ($file)=@_;
 my ($line,@out);
 open (FIL,$file) || die "can't open $file\n";
 while (defined($line=<FIL>)){
  if(substr($line,0,3) eq " B-"){
   foreach my $i (0..4){
    if (defined($line=<FIL>)){
     my(@rec)=split(' ',$line);
     push(@out,$rec[7]);
    }
   }
   close(FIL);
   return(@out);     
  }
 }
 close(FIL);
 return(@out);
}
sub rerank{
 my($map,$rank)=@_;
 my (@rerank);
 my (@sorted)=sort{$rank->[$map->[$a]]<=>$rank->[$map->[$b]]}(0..$#{$map});
 my $i=0;
 while($i<$#sorted){
  if($i== $#sorted || $rank->[$map->[$sorted[$i]]] != $rank->[$map->[$sorted[$i+1]]]){
   $rerank[$map->[$sorted[$i]]]=$i;
   $i++;
  }
  else{
   my $n=$i+1;
   while ($n < $#sorted && $rank->[$map->[$sorted[$n]]] == $rank->[$map->[$sorted[$n+1]]]){
    $n++;    
   }
   my $rank=($i+$n)/($n-$i+1);
   foreach my $k($i..$n){ 
    $rerank[$map->[$sorted[$k]]]=$i;
   }
   $i=$n+1;
  }
 }
 return(@rerank);
}
sub mean_var{
 my($map,$score)=@_;
 my ($sum,$ssq)=(0,0);
 foreach my $m (@{$map}){
  my $s=$score->[$m];
  $sum+=$s;
  $ssq+=$s*$s;
 }
 my $n=$#{$map}+1;
 my $mean=$sum/$n;
 my $var=$ssq/$n-$mean*$mean;
 return($mean,$var);
}


sub get_map_from_tra{
 my ($file)=@_;
 my ($line,@mymap);
 open (FIL,$file) || die "can't open $file\n";
 while (defined($line=<FIL>)){
  my(@rec)=split(' ',$line);
  if(@rec ==4){
   push(@mymap,$rec[3]);
  }
 }
 close(FIL);
 return(@mymap);
}
