#!/usr/bin/perl
## CONVERTS PHASE (CHROMOPAINTER) FORMAT TO BEAGLE FORMAT
use strict;
use Switch;
use POSIX;

sub help {
print("CONVERTS HAPLOID FINESTRUCTURE INPUTE TO DIPLOID\n");

print("usage:   perl hap2dip.pl <options> <inputfile> <outputfile>\n");


print("where:\n");
print("<inputfile>:    	Haploid FineSTRUCTURE input file (ChromoPainter output file, before or after calculating 'c')\n");
print("<outputfile>:   	Diploid FineSTRUCTURE input file (retaining any 'c' value)\n");

print("<options>:\n");
print("-n <namefile>	Use the first column in namefile as names for the individuals. Default: names are IND1-N\n");
print("-N <val> 	Number of header rows in the name file (default: 0)\n");
print("-v       	Verbose mode\n");
print("-vv       	Very verbose (debugging) mode\n");
print("-h       	This help text\n");
die "\n";
}

###############################
## ARGUMENT PROCESSING

my $verbose=0; ## verbose
my $vverbose=0; ## very verbose (debugging)
my $usenamefile=0; ## whether to read individual names from a file
my $namefileheaderlines=0; ## how many header lines to ignore of the name file

my @data; # the data in the matrix

my $infile="";
my $outfile="";
my $namefile="";

my $argon=0;
for (my $i = 0; $i < scalar(@ARGV); ++$i){
	if($ARGV[$i] eq "-v"){
		$verbose=1;
	}elsif($ARGV[$i] eq "-vv"){
		$verbose=1;
		$vverbose=1;
	}elsif($ARGV[$i] eq "-h"){
	    help();
	}elsif($ARGV[$i] eq "-n"){
		$usenamefile=1;
		$namefile= $ARGV[++$i];
	}elsif($ARGV[$i] eq "-N"){
		$namefileheaderlines= $ARGV[++$i];
	}else{
		switch($argon){
			case 0 {$infile="$ARGV[$i]";}
			case 1 {$outfile="$ARGV[$i]";}
			else {
				help();
			}
		}
		$argon++;
	}
}

if($outfile eq "" || $argon != 2) {help();}

if($verbose){
    print "Using input file $infile, output file $outfile\n";
    if($usenamefile) {print "Using name file $namefile with $namefileheaderlines header lines\n";
    }else{ print "Not using name file\n";}
}

####################################
## Define global variables
my @indnames; # names of the individuals

my $numhaps=0; # number of haplotypes observed
my $numinds=0; # number of haplotypes observed


####################################
## File IO

## Check we can read the input files
open INFILE, $infile or die $!;
if( $usenamefile ){
	open NAMEFILE, $namefile or die $!;
}

## Create output files
open OUTFILE, ">", $outfile or die $!;

####################################
## Functions we need
sub trim($){  # remove whitespace from beginning and end of the argument
	my $string = shift;
	$string =~ s/^\s+//;
	$string =~ s/\s+$//;
	return $string;
}

####################################
## Read the input file

## read the INFILE header
my $skip=1;
my @tmarr;
while ($skip) {
	my $tmp=trim(<INFILE>);
	if(substr($tmp,0,1) eq "#"){ # Cvalue line
	    print OUTFILE "$tmp\n";
	    if($verbose) {print "Read cvalue line $tmp\n";}
	}elsif(substr($tmp,0,9) == "Recipient"){
	    my @tarr=split(/ /,trim($tmp));
	    $numhaps = scalar(@tarr)-1;
	    $numinds = $numhaps/2.0;
	    if(int($numinds)!=$numinds){
		die "Expected an even number of haplotypes but received $numhaps...\n";
	    }
	    if($verbose) {print "Detected $numhaps haplotypes and $numinds individuals\n";}
	    $skip=0;
	}
}
# remaining lines are matrix rows
my $hapon=0;
while (my $tmp=<INFILE>) {
#    if($verbose){print "Reading haplotype $hapon\n";}
    my @tarr=split(/ /,trim($tmp));
    if(scalar(@tarr)!=$numhaps+1){
	my $tmp=scalar(@tarr);
	die "Expected $numhaps Haplotypes for haplotype row $hapon, but received $tmp\n";
    }
    shift @tarr;
    $data[$hapon++] = [ @tarr ];
}

close INFILE;

####################################
## make the individual names

if( $usenamefile ){
	for(my $i=0;$i < $namefileheaderlines;++$i){
		my $tmp=<NAMEFILE>;
	}
	while (my $tmp=<NAMEFILE>) {
		my @tarr=split(/ /,trim($tmp));
		push(@indnames, $tarr[0]);
	}
	close NAMEFILE;
	# check we have enough names
	my $tn=scalar(@indnames);
	if($tn < $numinds) {
		die "Error:Insufficient names in $namefile (found $tn, but have $numinds individuals)\n";
	}elsif($tn > $numinds) {
		print "WARNING: More names found in $namefile than expected (found $tn, but have $numinds individuals). These will be used but check they are correct.\n";
	}
}else{ # name them 1..N
	for (my $i = 1; $i <= $numinds; ++$i){
		push(@indnames, sprintf("ind%i",$i));
	}
}

print OUTFILE "Recipient";
for (my $ind = 0; $ind < $numinds; ++$ind){
    print OUTFILE " $indnames[$ind]";
}
print OUTFILE "\n";
for (my $ind1 = 0; $ind1 < $numinds; ++$ind1){
    print OUTFILE "$indnames[$ind1]";
    for (my $ind2 = 0; $ind2 < $numinds; ++$ind2){
	my $val=0;
	for (my $h1 = 0; $h1 < 2; ++$h1){
	    for (my $h2 = 0; $h2 < 2; ++$h2){
		if($vverbose){
		    my $f1=$ind1*2+$h1;
		    my $f2=$ind2*2+$h2;
		    my $f3=$data[$ind1*2+$h1][$ind2*2+$h2];
		    print "[$ind1,$ind2]: adding [$f1,$f2] = $f3\n";
		}
		$val += $data[$ind1*2+$h1][$ind2*2+$h2];
	    }
	}
	if($ind1==$ind2) {$val=0;}
	print OUTFILE " $val";
    }
    print OUTFILE "\n";
}

#for (my $snp = 0; $snp < $numsnps; ++$snp){
#	print "$snp $marker1[$snp]\n";
#	if($marker1[$snp] ne "?") { # exclude non varying SNPS
#		print BEAGLESNP "M $snplocs[$snp]";
#		for (my $hap = 0; $hap < $numhaps; ++$hap){
#			my $tval=snp2marker($snpvals[$hap][$snp]);
#			print BEAGLESNP " $tval";
#		}
#		print BEAGLESNP "\n";
#	}
#}
close OUTFILE;# close snp file
