#!/usr/bin/perl
use strict;
use warnings;

use File::Copy;

sub help{
    print "Usage: perl chromopainterindivrename.pl namelist inputfileroot outputfileroot\n";
    print "namelist is a file containing all individual names, one per row.\n";
    print " (e.g. as produced by plink2chromopainter.pl).\n";
    print "inputfileroot is the chromopainter file root, i.e. everything before.chunkcounts.out\n";
    print "outputfileroot will be created with identical structure as inputfileroot,\n with the exception that IND1-INDN will be replaced by the names in namelist..\n";
    die "\n";
}

if(@ARGV != 3) {help();}

my $namelist=$ARGV[0];
my $inputfileroot=$ARGV[1];
my $outputfileroot=$ARGV[2];

## internal variables
my @cpends=("regionsquaredchunkcounts.out","regionchunkcounts.out","chunklengths.out","chunkcounts.out","prop.out","mutationprobs.out");
my @cpcopy=("samples.out","copyprobsperlocus.out.gz");
my @cpends2=("EMprobs.out");
my @names;
my $nummissing=0;

## Read the names
open INFILE, $namelist or die $!;
while (<INFILE>) {
	my @tmp = split;
	push(@names, $tmp[0]);
}
close INFILE;
my $numnames=scalar(@names);

#################################
##### Matrices
sub hasNumRegions{
    my $ending=$_[0];
    if("$ending" eq "regionsquaredchunkcounts.out"){ return(1); }
    if("$ending" eq "regionchunkcounts.out"){ return(1); }
    return(0);
}
## loop over all input files
for(my $i=0; $i<scalar(@cpends); ++$i) {
    my $infile="$inputfileroot.$cpends[$i]";
    my $outfile="$outputfileroot.$cpends[$i]";
    print "PROCESSING $infile -> $outfile\n";

## account for additional columns in some files
    my $other=0;
    if(hasNumRegions("$cpends[$i]")) {
	$other=1;
    }
## run through the file
    my $lineon=0;
    my $missing=0;
    open INFILE, $infile or $missing=1;
    if($missing==1) {
	++$nummissing;
	print "Skipping missing file $infile\n"; 
	next;
    };
    open OUTFILE, ">", $outfile or die $!;
    while (<INFILE>) {
	my @tmp = split;
	my $tmpsize = scalar(@tmp);
	if( $tmp[0] =~ "^#") {# $tmpsize <= 1 || 
	    print OUTFILE "@tmp\n";
	    next; # assume that this line wasn't meant to be valid
	}
	if(scalar(@tmp) != $numnames + 1 + $other) {
	    my $tval=scalar(@tmp);
	    my $tvalobs=$numnames+1+$other;
	    die("Expected $tvalobs columns on line $lineon, but received $tval!\n");
	}
	if($lineon == 0 ){# header line
	    print OUTFILE "$tmp[0] ";
	    if(hasNumRegions("$cpends[$i]")){
		print OUTFILE "$tmp[1] ";
	    }
	    for(my $i=0;$i<$numnames-1;++$i){
		print OUTFILE "$names[$i] ";
	    }
	    print OUTFILE "$names[$numnames-1]\n";
	}else{
	    print OUTFILE "$names[$lineon - 1] ";
	    for(my $i=1;$i<scalar(@tmp)-1;++$i){
		print OUTFILE "$tmp[$i] ";
	    }
	    print OUTFILE "$tmp[scalar(@tmp)-1]\n";
	}
	++$lineon;
    }
    close INFILE;
    close OUTFILE;
    
}

#################################
##### Replace IND <num> with $names[num-1]
## loop over all input files
for(my $i=0; $i<scalar(@cpends2); ++$i) {
    my $infile="$inputfileroot.$cpends2[$i]";
    my $outfile="$outputfileroot.$cpends2[$i]";
    print "PROCESSING $infile -> $outfile (IND N -> \$names[N-1])\n";
## run through the file
    my $missing=0;
    open INFILE, $infile or $missing=1;
    if($missing==1) {
	++$nummissing;
	print "Skipping missing file $infile\n"; 
	next;
    };
    open OUTFILE, ">", $outfile or die $!;
    while (<INFILE>) {
	my $tstr=$_;
	if(m/IND/) { # it is a line with an IND; replace appropriately
	    $_=$tstr;
	    for(my $i=0;$i<$numnames;++$i) {
		my $ieff=$i+1;
#		my $varfrom="IND\\s$ieff\[\\s\$\]"; # this removes the line ending...
		my $varfrom="IND\\s$ieff\$";
		my $varto="$names[$i]";
		$_ =~ s/$varfrom/$varto/g;
	    }
	}
	print OUTFILE;
    }
    close INFILE;
    close OUTFILE;
}

#################################
##### Simple copy
## loop over all input files
for(my $i=0; $i<scalar(@cpcopy); ++$i) {
    my $infile="$inputfileroot.$cpcopy[$i]";
    my $outfile="$outputfileroot.$cpcopy[$i]";
    print "PROCESSING $infile -> $outfile (COPY)\n";
    my $missing=0;
    copy($infile, $outfile) or $missing=1;
    if($missing){
	++$nummissing;
	print "Skipping missing file $infile\n";
    }
}

if($nummissing>0 && $nummissing<=4) { 
    print "Some files ($nummissing) were missing; this is normal if you used ChromoCombine output files.\n";
}elsif($nummissing>4){
    print "Many output files ($nummissing) are missing; this could indicate a problem.\n";
}
