#!/usr/bin/perl -w

###########################
#    MatchLocations.pl    #
#      Version 1.02       #
#    (c) Ed Egan 2009     # 
###########################

=head1 NAME

MatchLocations.pl v1.01

=head1 DESCRIPTION & AUTHOR

'Process patent inventor location files and match the locations against known GNS location names, recording longitude and latitude'

Written by Ed Egan <ed@edegan.com>

Copyright (C) 2009. Be nice.

=head1 SYNOPSIS

perl MatchPatentLocations.pl [-co=<str>] [-o=<file>]  

=head1 OPTIONS
    -co=<str>:      ISO9660 country code (e.g. GB)
    -o=<file>:      Outfile name (default=co.-Results.txt): Name of processed file that will be created
    -u=:            Output UNMATCHED results as well. If not present, only matched results are written
    -wf:            Write the fuzzy output files seperately
    -human:         Write a human match file with candidate matches for unmatched strings.
    -r:             Write a report file that is the same as the STDOUT report
    -h:             Display this semi-informative help

=cut

#Load Packages
use strict;
use Match::Common qw(CleanString);
use Match::GNS;
use Match::Patent;
use Match::Gram;
use Match::LCS;
use Match::PostalCodes;
use warnings;
use Getopt::Long;
use Data::Dumper;
use Pod::Usage;

# Read command-line arguments / Define global variables
my $opt_show_help=0;
my ($opt_co, $opt_outfile);
my $opt_sourcedir="Source";
my $opt_refdir="GNS";
my $opt_writefuzzy=0; my $opt_human=0; my $opt_report=0; my $opt_unmatched=0;
GetOptions(
      "co=s"    => \$opt_co,
      "o=s"     => \$opt_outfile,
      "u"       => \$opt_unmatched,
      "wf"      => \$opt_writefuzzy,
      "human"   => \$opt_human,
      "r"       => \$opt_report,
      "h"       => \$opt_show_help
) || die pod2usage(2);
if ($opt_show_help) {
   pod2usage(-exitstatus => 0, -verbose => 2);
}
if (!$opt_outfile) {$opt_outfile=$opt_co."-Results.txt";}
my @Letters=("A","P");
my $ReportFileName=$opt_co."-Report-".time().".txt";
if ($opt_report) {open (REPORT,">$ReportFileName") || die "Can not open the Report file to write".$!;} #autoflush REPORT 1;
#autoflush STDOUT 1;

#And go!
my $StartTime=time();
dprint ("#######################################\n");
dprint ("#           MatchLocations.pl         #\n");
dprint ("#                 v1.01               #\n");
dprint ("#          (c) Ed Egan, 2009          #\n");
dprint ("#######################################\n\n");
dprint ("Starting the program...\t\t[OK]\n");
dprint ("\tDate & Time:\t\t".scalar(localtime)."\n");

dprint ("Loading the source files...\t");
my $Location=Patent->new($opt_co,$opt_sourcedir);
dprint ("[Done]\n");
dprint ("\tMain Read Count:\t".$Location->GetCount("READ")."\n");
dprint ("\tRecord Count:\t\t".$Location->GetCount("DATA")."\n");
dprint ("\tException Read Count:\t".$Location->GetCount("EXPREAD")."\n");

dprint ("Loading the ref files...\t");
my $GNS=GNS->new($opt_co,$opt_refdir);
dprint ("[Done]\n");
dprint ("\tRead Count:\t\t".$GNS->GetCount("DATA")."\n");

dprint ("Clean & Parse source...\t\t");
$Location->CleanAndParse;
dprint ("[Done]\n");

dprint ("Build ref indices...\t\t");
$GNS->Index;
dprint ("[Done]\n");

dprint ("Exceptions List Matching...\t");
my $ExpSuccess=ExactMatchList("EXP_LIST");
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

dprint ("Well Formatted List Matching...\t");
my $CtyListSuccess=ExactMatchList("CTY_LIST");
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

dprint ("Token matching...\t\t");
my $TokenSuccess=TokenMatch("CTY_CLEAN","EXACTTOKEN");
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

dprint ("Ngram and LCS Setup...\t\t[OK]\n");
my ($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug)=("CTY_CLEAN","NGRAM_CTY_CLEAN_1-4-Special",1,4,0.8,1,0.8,1,0.84,0,10,0);
&PrintNgramLCSSettings($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("Ngram and LCS matching...\t");
my $NgramSuccess=GramMatch($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

dprint ("Ngram and LCS Setup...\t\t[OK]\n");
($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug)=("CTY_CLEAN","NGRAM_CTY_CLEAN_6-3-Hard",6,3,0.5,1,0.5,1,0.85,1,10,0);
&PrintNgramLCSSettings($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("Ngram and LCS matching...\t");
$NgramSuccess=GramMatch($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

dprint ("Ngram and LCS Setup...\t\t[OK]\n");
($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug)=("CTY_CLEAN","NGRAM_CTY_CLEAN_6-2-Medium",6,2,0.6,1.2,0.6,1.2,0.86,1,1,0);
&PrintNgramLCSSettings($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("Ngram and LCS matching...\t");
$NgramSuccess=GramMatch($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug);
dprint ("[Done]\n");
dprint ("\tMatches Made:\t\t".$Location->GetCount("MATCHES_LAST")."\n");
dprint ("\tTotal Matches:\t\t".$Location->GetCount("MATCHES_TOTAL")."\n");

my $HumanSet=undef;
if ($opt_human) {
   dprint ("Create the Human choices...\t");
   #($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug)
   my $Human1=GramMatch("CTY_CLEAN","HUMAN_CHOICE1",6,3,0.3,1.5,0.3,1.5,0.3,1,1,1);
   my $Human2=GramMatch("CTY_CLEAN","HUMAN_CHOICE2",1,2,0.3,1.5,0.3,1.5,0.3,0,5,1);
   $HumanSet=ReconcileForHumans($Human1,$Human2);
   dprint ("[Done]\n");
}

dprint ("Deduplicate and fix...\t\t");
my $DistanceCalcs=FixMatches();
dprint ("[Done]\n");
dprint ("\tDistance Lookups:\t".$DistanceCalcs."\n");

dprint ("Writing output file...\t\t");
my $LinesWritten=undef;
if (!$opt_unmatched) {$LinesWritten=WriteOutfile($opt_outfile,"MATCHED"); dprint ("[MATCHED ONLY]\n");}
else {$LinesWritten=WriteOutfile($opt_outfile); dprint ("[INCS UNMATCHED]\n");}
dprint ("\tOutput Filename:\t".$opt_outfile."\n");
dprint ("\tLines Written:\t\t".$LinesWritten."\n");

if ($opt_writefuzzy) {
   dprint ("Writing Fuzzy file...\t\t");
   my $FuzzySetName="NGRAM_CTY_CLEAN_1-4-Special";
   $LinesWritten=WriteOutfile($opt_co."-".$FuzzySetName.".txt",$FuzzySetName);
   dprint ("[Done]\n");
   dprint ("\tFuzzy Setname:\t\t".$FuzzySetName."\n");
   dprint ("\tLines Written:\t\t".$LinesWritten."\n");
   
   dprint ("Writing Fuzzy file...\t\t");
   $FuzzySetName="NGRAM_CTY_CLEAN_6-3-Hard";
   $LinesWritten=WriteOutfile($opt_co."-".$FuzzySetName.".txt",$FuzzySetName);
   dprint ("[Done]\n");
   dprint ("\tFuzzy Setname:\t\t".$FuzzySetName."\n");
   dprint ("\tLines Written:\t\t".$LinesWritten."\n");
   
   dprint ("Writing Fuzzy file...\t\t");
   $FuzzySetName="NGRAM_CTY_CLEAN_6-2-Medium";
   $LinesWritten=WriteOutfile($opt_co."-".$FuzzySetName.".txt",$FuzzySetName);
   dprint ("[Done]\n");
   dprint ("\tFuzzy Setname:\t\t".$FuzzySetName."\n");
   dprint ("\tLines Written:\t\t".$LinesWritten."\n");
}

if ($opt_human){
   dprint ("Writing Human choice file...\t");
   if ($HumanSet) {WriteHumanChoiceFile($HumanSet);}
   dprint ("[Done]\n");
}

my $TimeElapsed=time()-$StartTime;
dprint ("Script run completed...\t\t[OK]\n\n");
dprint ("#######################################\n");
dprint ("#            (C)2009 Ed Egan.         #\n");
dprint ("#       Please thank the author.      #\n");
dprint ("#    Run completed in: $TimeElapsed seconds.   #\n"); #$TimeElapsed
dprint ("#######################################\n");
if ($opt_report) {close (REPORT);}

sub dprint {
   my $Str=shift @_;
   print STDOUT $Str;
   if ($opt_report) {print REPORT $Str;}
}

sub PrintNgramLCSSettings {
	my ($ListType,$MarkType,$GramAlphabet,$NGramLength,$LeftLower,$LeftUpper,$RightLower,$RightUpper,$LCSThreshold,$FirstLetterBinds,$MinLengthThreshold,$Debug)=@_;
	dprint ("\tSettings:\n");
	dprint ("\t\tListType:\t".$ListType."\n");
	dprint ("\t\tMarkType:\t".$MarkType."\n");
	dprint ("\t\tGramAlphabet:\t".$GramAlphabet."\n");
	dprint ("\t\tNGramLength:\t".$NGramLength."\n");
	dprint ("\t\tLeftLower:\t".$LeftLower."\n");
	dprint ("\t\tLeftUpper:\t".$LeftUpper."\n");
	dprint ("\t\tRightLower:\t".$RightLower."\n");
	dprint ("\t\tRightUpper:\t".$RightUpper."\n");
	dprint ("\t\tLCSThreshold:\t".$LCSThreshold."\n");
	dprint ("\t\tFirstLetter:\t".$FirstLetterBinds."\n");
	dprint ("\t\tMinMaxLength:\t".$MinLengthThreshold."\n");
	dprint ("\t\tDebug:\t\t".$Debug."\n");
	return 1;
}

sub ExactMatchList {
   my $ListType=shift;
   my $UnMatched=$Location->GetUnMatched($ListType);
   my $Success=0;
   if (scalar(keys(%$UnMatched))>0) {
      my $Matched={};
      foreach my $Letter (@Letters) {
         my @Index=$GNS->GetIndexKeys($Letter);
         foreach my $CTY (keys (%$UnMatched)) {
			undef my @Matches;
            foreach my $Field (@{$UnMatched->{$CTY}}) {
               foreach my $Key (@Index) {
                  if (uc($Key) eq uc ($Field)) {push (@Matches,$Key);}
               }
            }
			if (@Matches) {$Matched->{$CTY}->{$Letter."LIST"}=\@Matches;}
         }
      }
      if (scalar(keys(%$Matched))>0) {
         $Matched=ChooseBestAandP($Matched);
         $Location->ReturnMatches($Matched,$ListType);
         $Success=1; 
      }
   }
   return $Success;
}

sub TokenMatch {
   my $ListType=shift;
   my $MarkType=shift;
   my $UnMatched=$Location->GetUnMatched($ListType);
   my $Success=0;
   my $MaxLength=5; #Set the max token length
   if (scalar(keys(%$UnMatched))>0) {
      my $Matched={};
      foreach my $Letter (@Letters) {
         my @Index=$GNS->GetIndexKeys($Letter);
         foreach my $CTY (keys (%$UnMatched)) {
            undef my @Matches;
            my @Tokens=split(" ",$UnMatched->{$CTY});
            my $StartLength=($MaxLength-1 <= $#Tokens) ? $MaxLength-1 : $#Tokens;
            for (my $i=$StartLength; $i>=0; $i--) {
               if (scalar(@Matches)>0) {next;}
               undef my @SliceStart;
               my $Exhausted=0; my $StartPos=$#Tokens-$i;
               until ($Exhausted) {
                  if ($StartPos<0) {$Exhausted=1;next;}
                  push (@SliceStart,$StartPos);
                  $StartPos--;
               }
               foreach my $StartPos (@SliceStart) {
                  my $TokenStr=join(" ",@Tokens[$StartPos..($StartPos+$i)]);
                  foreach my $Key (@Index) {
                     if (uc($Key) eq uc($TokenStr)) {push (@Matches,$Key);}
                  }
               }
            }
            if (@Matches) {$Matched->{$CTY}->{$Letter."LIST"}=\@Matches;}   
         }
      }
      if (scalar(keys(%$Matched))>0) {
         $Matched=ChooseBestAandP($Matched);
         $Location->ReturnMatches($Matched,$MarkType);
         $Success=1; 
      }
   }
   return $Success;
}

sub GramMatch {
   my $ListType=shift; 
   my $MarkType=shift;
   my $GramAlphabet=shift; #Alphanumerics, Alphas, spaced, etc... See GramMatch.pm code for numbers
   my $NGramLength=shift; #Use trigams, bigrams, etc
   my $LeftLower=shift; #The Lower threshold Pc of the Ref that appears in the Source
   my $LeftUpper=shift; #The Upper threshold Pc of the Ref that appears in the Source
   my $RightLower=shift; #The Lower threshold Pc of the Source that appears in the Ref
   my $RightUpper=shift; #The Upper threshold Pc of the Source that appears in the Ref
   my $LCSThreshold=shift; #The Pc of the Ref that is the LCS
   my $FirstLetterBinds=shift; #Must the First Letter Match?
   my $MaxLengthThreshold=shift; #What is the LCS Max Length Threshold?
   my $Debug=shift; #For Debug/Human Results
   my $Success=0;
   my $UnMatched=$Location->GetUnMatched($ListType);
   if (scalar(keys(%$UnMatched))>0) {
      my $LCS=LCS->new();
      my $BestLeftPc=0; my $BestRightPc=0; #For debug only - keep a running best...
      my $Gram=Gram->new($GramAlphabet,$NGramLength);
      my $FuzzyMatches={}; #For Debug/Human Results
      my $Matched={};
      foreach my $Letter (@Letters) {
         my @Index=$GNS->GetIndexKeys($Letter);
         my $ForwardIndex=$Gram->Index(\@Index,$Letter);
         my $ReverseIndex=$Gram->ReverseIndex(\@Index,$Letter); #for speed on long datasets use the reverse index - on short datasets this will slow things down though
         foreach my $CTY (keys (%$UnMatched)) {
            undef my @Matches; my $BestLeftScore=0; my $BestRightScore=0; my $BestCandidates={};
            my $SourceGrams=$Gram->GetNGrams($UnMatched->{$CTY});
            my $SourceTotal=$Gram->Total($SourceGrams);
            undef my @RefListWithDups;
            foreach my $SourceGram (keys(%$SourceGrams)) {
               my $ReturnList=$Gram->ReverseList($SourceGram,$Letter);
               if ($ReturnList) {push (@RefListWithDups,@$ReturnList);}
            }
            my $Cleaning={};
            foreach (@RefListWithDups) {$Cleaning->{$_}=1;}
            my @RefList=keys (%$Cleaning);
            if (scalar(@RefList)==0) {next;}
            my $RefGrams=$Gram->Lookup(\@RefList,$Letter);
            if (!@Matches) {
               foreach my $GramKey (keys (%$RefGrams)) {
                  if ($FirstLetterBinds) {
                     my $Source=uc($UnMatched->{$CTY}); my $Ref=uc($GramKey);
                     if (substr($Source,0,1) ne substr($Ref,0,1)) {next;}
                  }
                  my $LeftScore=$Gram->Score($RefGrams->{$GramKey},$SourceGrams); #Use the reference string as left - it is 'correct'
                  my $RightScore=$Gram->Score($SourceGrams,$RefGrams->{$GramKey});
                  if ($LeftScore >= $BestLeftScore) {
                     $BestLeftScore=$LeftScore;
                     if ($RightScore > $BestRightScore) {
                        $BestRightScore=$RightScore;
                        $BestCandidates={};
                        $BestCandidates->{$GramKey}->{BESTLEFTSCORE}=$BestLeftScore;
                        $BestCandidates->{$GramKey}->{BESTRIGHTSCORE}=$BestRightScore;
                     }
                     elsif ($RightScore == $BestRightScore) {
                        $BestCandidates->{$GramKey}->{BESTLEFTSCORE}=$BestLeftScore;
                        $BestCandidates->{$GramKey}->{BESTRIGHTSCORE}=$BestRightScore;
                     }
                  }
               }
            }
            #Decide if any matches are good enough...
            if (scalar(keys(%$BestCandidates))>0 && $BestLeftScore>0) {
               my $LCSBestScore=0; my $LCSBestCandidate=undef;
               foreach my $BestCandidate (keys (%$BestCandidates)) {
                  my $RefTotal=$Gram->Total($RefGrams->{$BestCandidate});
                  my $RefPc=$BestLeftScore/$RefTotal; #Percentage of Ref that appears in Source
                  my $SourcePc=$BestRightScore/$SourceTotal; #Percentage of Source that appears in Ref
                  if ($RefPc>=$LeftLower && $RefPc<=$LeftUpper && $SourcePc >= $RightLower && $SourcePc <= $RightUpper) {
                     my $LCSScore=$LCS->LCS($UnMatched->{$CTY},$BestCandidate);
                     my $MaxLength=(length($UnMatched->{$CTY}) >= length($BestCandidate)) ? length($UnMatched->{$CTY}) : length($BestCandidate);
                     if ($LCSScore >= $LCSBestScore) {
                        $LCSBestScore=$LCSScore;
                        $LCSBestCandidate=$BestCandidate;
                     }
                  }
               }
               if (defined($LCSBestCandidate)) {
                  my $MaxLength=(length($UnMatched->{$CTY}) >= length($LCSBestCandidate)) ? length($UnMatched->{$CTY}) : length($LCSBestCandidate);   
                  if ($LCSBestScore/$MaxLength >= $LCSThreshold && $MaxLength>=$MaxLengthThreshold) {
                     push (@Matches,$LCSBestCandidate);
                     if ($Debug) {
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{CTY}=$CTY;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{REFTOTAL}=$Gram->Total($RefGrams->{$LCSBestCandidate});
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{SOURCETOTAL}=$SourceTotal;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{REFPC}=$BestCandidates->{$LCSBestCandidate}->{BESTLEFTSCORE};
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{SOURCEPC}=$BestCandidates->{$LCSBestCandidate}->{BESTRIGHTSCORE};;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{LEFTGRAMS}=$BestLeftScore;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{RIGHTGRAMS}=$BestRightScore;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{LCSSCORE}=$LCSBestScore;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{SOURCELENGTH}=length($UnMatched->{$CTY});
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{REFLENGTH}=length($LCSBestCandidate);
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{MAXLENGTH}=$MaxLength;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{LCSPC}=$LCSBestScore/$MaxLength;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{FIRSTLETTERBINDS}=$FirstLetterBinds;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{GRAMALPHABET}=$GramAlphabet;
                        $FuzzyMatches->{$UnMatched->{$CTY}}->{$LCSBestCandidate}->{GRAMLENGTH}=$NGramLength;
                     }
                  }
               }
            }
            if (@Matches) {$Matched->{$CTY}->{$Letter."LIST"}=\@Matches;}
         }
      }
      if (!$Debug) {if (scalar(keys(%$Matched))>0) {$Location->ReturnMatches($Matched,$MarkType); $Success=1;}}
      else {return $FuzzyMatches;}
   }
   return $Success;
}

sub ChooseBestAandP {
   my $Matched=shift;
   my $RefinedMatches={};
   #The below routine forces different A and P entries if possible
   foreach my $Key (keys (%$Matched)) {
      if (defined($Matched->{$Key}->{ALIST}) && defined($Matched->{$Key}->{PLIST})) {
         if (scalar(@{$Matched->{$Key}->{ALIST}})==1 && scalar(@{$Matched->{$Key}->{PLIST}})>1) {
            my $A=${$Matched->{$Key}->{ALIST}}[0];
            undef my @CleanedPList;
            foreach my $P (@{$Matched->{$Key}->{PLIST}}) {
               if ($P ne $A) {push(@CleanedPList,$P);}
            }
            if (scalar(@CleanedPList >0)) {$RefinedMatches->{$Key}->{PLIST}=\@CleanedPList;}
            else {$RefinedMatches->{$Key}->{PLIST}=$Matched->{$Key}->{PLIST};}
            $RefinedMatches->{$Key}->{ALIST}=$Matched->{$Key}->{ALIST};
         }
         elsif (scalar(@{$Matched->{$Key}->{ALIST}})>1 && scalar(@{$Matched->{$Key}->{PLIST}})==1) {
            my $P=${$Matched->{$Key}->{PLIST}}[0];
            undef my @CleanedAList;
            foreach my $A (@{$Matched->{$Key}->{ALIST}}) {
               if ($A ne $P) {push(@CleanedAList,$A);}
            }
            if (scalar(@CleanedAList >0)) {$RefinedMatches->{$Key}->{ALIST}=\@CleanedAList;}
            else {$RefinedMatches->{$Key}->{ALIST}=$Matched->{$Key}->{ALIST};}
            $RefinedMatches->{$Key}->{PLIST}=$Matched->{$Key}->{PLIST};
         }         
         else {$RefinedMatches->{$Key}=$Matched->{$Key};}
      }
      else {$RefinedMatches->{$Key}=$Matched->{$Key};}
   }
   return $RefinedMatches;
}

sub FixMatches {
   my $CleanedMatch={};
   my $Counter=0;
   my $MatchedA=$Location->GetMatched("A");
   my $MatchedP=$Location->GetMatched("P");
   push (my @MatchedAllWithDups,keys(%$MatchedA),keys(%$MatchedP));
   my $MatchedAll={};
   foreach (@MatchedAllWithDups) {$MatchedAll->{$_}=1;}
   foreach my $CTY (keys (%$MatchedAll)) {
      if (defined($MatchedA->{$CTY})){
         my $ALookup={}; my $PLookup; my $BestA=undef;
         if (defined($MatchedP->{$CTY})){
            #A's and P's
            foreach my $AMatch (@{$MatchedA->{$CTY}}) {
               $ALookup->{$AMatch}->{LONG}=$GNS->GetLong($AMatch,"A");
               $ALookup->{$AMatch}->{LAT}=$GNS->GetLat($AMatch,"A");
            }
            foreach my $PMatch (@{$MatchedP->{$CTY}}) {
               $PLookup->{$PMatch}->{LONG}=$GNS->GetLong($PMatch,"P");
               $PLookup->{$PMatch}->{LAT}=$GNS->GetLat($PMatch,"P");
            }
            foreach my $A (keys(%$ALookup)) {
               my $ShortestDist=undef;
               my $BestP=undef;
               foreach my $P (keys(%$PLookup)) {
                  if (!$ShortestDist) {
                     if ((scalar(@{$MatchedP->{$CTY}}) == 1) && (scalar(@{$MatchedA->{$CTY}}) == 1)) {
                        $ShortestDist=0; $BestP=$P; #Save a distance calc
                     }
                     else {
                        $ShortestDist=$GNS->GetDist($ALookup->{$A}->{LAT},$ALookup->{$A}->{LONG},$PLookup->{$P}->{LAT},$PLookup->{$P}->{LONG});
                        $BestP=$P;
                        $Counter++;
                     }
                  }
                  else {
                     my $CurrentDist=$GNS->GetDist($ALookup->{$A}->{LAT},$ALookup->{$A}->{LONG},$PLookup->{$P}->{LAT},$PLookup->{$P}->{LONG});
                     if ($CurrentDist < $ShortestDist) {$ShortestDist=$CurrentDist; $BestP=$P;}
                  }
               }
               $ALookup->{$A}->{SHORTESTDIST}=$ShortestDist;
               $ALookup->{$A}->{BESTP}=$BestP;
            }
            my $ShortestDist=undef;
            foreach my $A (keys(%$ALookup)) {
               if (!$ShortestDist) {$ShortestDist=$ALookup->{$A}->{SHORTESTDIST}; $BestA=$A;}
               else {if ($ALookup->{$A}->{SHORTESTDIST} < $ShortestDist) {$ShortestDist=$ALookup->{$A}->{SHORTESTDIST};$BestA=$A;}}
            }
            $CleanedMatch->{$CTY}->{"P_NAME"}=$ALookup->{$BestA}->{BESTP};
            $CleanedMatch->{$CTY}->{"P_UNI"}=$GNS->GetUNI($ALookup->{$BestA}->{BESTP},"P");
            $CleanedMatch->{$CTY}->{"P_LAT"}=$PLookup->{$ALookup->{$BestA}->{BESTP}}->{LAT};
            $CleanedMatch->{$CTY}->{"P_LONG"}=$PLookup->{$ALookup->{$BestA}->{BESTP}}->{LONG};
         }
         else {
            #A's but no P's
            $BestA=${$MatchedA->{$CTY}}[0];
            $ALookup->{$BestA}->{LAT}=$GNS->GetLat($BestA,"A");
            $ALookup->{$BestA}->{LONG}=$GNS->GetLong($BestA,"A");
         }
         $CleanedMatch->{$CTY}->{"A_NAME"}=$BestA;
         $CleanedMatch->{$CTY}->{"A_UNI"}=$GNS->GetUNI($BestA,"A");
         $CleanedMatch->{$CTY}->{"A_LAT"}=$ALookup->{$BestA}->{LAT};
         $CleanedMatch->{$CTY}->{"A_LONG"}=$ALookup->{$BestA}->{LONG};
      }
      else {
         if (defined ($MatchedP->{$CTY})) {
            if (scalar(@{$MatchedP->{$CTY}}) > 0){
               #P's but no A's
               $CleanedMatch->{$CTY}->{"P_NAME"}=${$MatchedP->{$CTY}}[0];
               $CleanedMatch->{$CTY}->{"P_UNI"}=$GNS->GetUNI($CleanedMatch->{$CTY}->{"P_NAME"},"P");
               $CleanedMatch->{$CTY}->{"P_LAT"}=$GNS->GetLat($CleanedMatch->{$CTY}->{"P_NAME"},"P");
               $CleanedMatch->{$CTY}->{"P_LONG"}=$GNS->GetLong($CleanedMatch->{$CTY}->{"P_NAME"},"P");
            }
         }
      }
   }
   $Location->FileCleanMatches($CleanedMatch);
   return $Counter;
}

sub WriteOutfile {
   my $Outfile=shift;
   my $MatchedList=shift;
   open (OUTFILE,">$Outfile") || die "\nERROR: Can not open output file to write\n";
   my $Header=join("\t",$Location->ReturnData()); 
   print OUTFILE $Header."\n";
   my @Keys=$Location->ReturnKeys($MatchedList);
   for (my $i=0;$i<=$#Keys;$i++) {
      my $Line=join("\t",$Location->ReturnData($Keys[$i]));
      print OUTFILE $Line."\n";
   }
   close (OUTFILE);
   return scalar(@Keys);
}

sub ReconcileForHumans {
   my @SetList=@_;
   my $HumanSet=undef;
   foreach my $Set (@SetList) {
      if (defined $Set) {
         if ($Set) {
            foreach my $SourceKey (keys (%$Set)) {
               foreach my $RefKey (keys (%{$Set->{$SourceKey}})) {
                  if (defined ($HumanSet->{$SourceKey})) {
                     if (defined ($HumanSet->{$SourceKey}->{$RefKey})) {next;}
                     else {$HumanSet->{$SourceKey}->{$RefKey}=$Set->{$SourceKey}->{$RefKey};}
                  }
                  else {$HumanSet->{$SourceKey}->{$RefKey}=$Set->{$SourceKey}->{$RefKey};}
               }
            }
         }
      }
   }
   return $HumanSet;
}

sub WriteHumanChoiceFile {
   my $HumanSet=shift;
   my $FileName=$opt_co."-HumanChoices.txt";
   open (DEBUG,">$FileName") || die "Can't open the Human choice file to write";
   my @Vars=("CTY","REFTOTAL","SOURCETOTAL","REFPC","SOURCEPC","LEFTGRAMS","RIGHTGRAMS","LCSSCORE","SOURCELENGTH","REFLENGTH","MAXLENGTH","LCSPC","FIRSTLETTERBINDS","GRAMALPHABET","GRAMLENGTH");
   print DEBUG "SOURCENAME\tREFNAME\t".join("\t",@Vars)."\n";
   foreach my $SourceName (keys(%$HumanSet)) {
      foreach my $RefName (keys(%{$HumanSet->{$SourceName}})) {
         undef my @Results;
         for (my $i=0;$i<=$#Vars;$i++) {
            push (@Results,$HumanSet->{$SourceName}->{$RefName}->{$Vars[$i]});
         }
         print DEBUG $SourceName."\t".$RefName."\t".join("\t",@Results)."\n";
      }
   }
   close(DEBUG);
}
