###########################
#    Match::Patent.pm     #
#      Version 1.01       #
#    (c) Ed Egan 2009     # 
###########################

package Patent;
no warnings 'redefine';
@ISA= qw (Common);
use strict;
use warnings;
use Match::PostalCodes;

sub new {
    my $class = shift;
    my $self = bless {}, $class;
    $self->{GLOBALS}->{ISO9660}=lc(shift);
    $self->{DIR}=shift;
    $self->{GLOBALS}->{SOURCEFILE}=$self->{GLOBALS}->{ISO9660}.".txt";
    $self->{GLOBALS}->{EXCEPTIONSFILE}=$self->{GLOBALS}->{ISO9660}."_exceptions.txt";
    $self->{GLOBALS}->{STOPWORDSFILE}="PatentLocations-Stopwords.txt";
    $self->{COUNT_MATCHES_LAST}=0;
    $self->{COUNT_MATCHES_TOTAL}=0;
    $self->{COUNT_EXPREAD}=0;
    $self->{COUNT_READ}=0;
    $self->Load($self->{DIR});
    $self->LoadStopWords;
    return $self;
}

sub Load {
    my $self=shift;
    my $Dir=shift;
    #The Load is robust to changes into the order of the columns and inclusion of new columns, but not to renamings
    my $sourcefile=$self->{GLOBALS}->{SOURCEFILE};
    my $sourcefilewithpath;
    if ($Dir) {$sourcefilewithpath="$Dir/$sourcefile";}
    else {$sourcefilewithpath=$sourcefile;}
    open (SOURCE,"$sourcefilewithpath") || die "Can not find the source file - Is it the module dir?".$!;
    undef my @header;
    my $headercol={};
    while (<SOURCE>) {
        chomp $_;
        if (!@header) {
            @header=split(/\t/,$_);
            for (my $i=0;$i<=$#header;$i++) {$headercol->{$header[$i]}=$i;}
            next;
        }
        $self->{COUNT_READ}++;
        my @temp=split(/\t/,$_);
        if (!($temp[$headercol->{cty}])) {next;} #Must have a CTY field
        $self->{DATA}->{$temp[$headercol->{cty}]}->{CTY}=$temp[$headercol->{cty}];
        $self->{DATA}->{$temp[$headercol->{cty}]}->{RAWDATA}=\@temp;
    }
    close (SOURCE);
    $self->{COUNT_DATA}=scalar(keys(%{$self->{DATA}}));
    my $exceptionsfile=$self->{GLOBALS}->{EXCEPTIONSFILE};
    my $exceptionsfilewithpath;
    if ($Dir) {$exceptionsfilewithpath="$Dir/$exceptionsfile";}
    else {$exceptionsfilewithpath=$exceptionsfile;}
    if (-e $exceptionsfilewithpath) {
        open (EXCEPTIONS,"$exceptionsfilewithpath") || die "Can not find the exceptions file - Is it the module dir?".$!;
        undef @header;
        $headercol={};
        while (<EXCEPTIONS>) {
            chomp $_;
            if (!@header) {
                @header=split(/\t/,$_);
                for (my $i=0;$i<=$#header;$i++) {$headercol->{$header[$i]}=$i;}
                next;
            }
            $self->{COUNT_EXPREAD}++;
            my @temp=split(/\t/,$_);
            if (!($temp[$headercol->{cty}])) {next;} #Must have a CTY field
            #$self->{DATA}->{$temp[$headercol->{cty}]}->{CTY}=$headercol->{cty};
            $self->{DATA}->{$temp[$headercol->{cty}]}->{EXP_DATA}=\@temp;
            $self->{DATA}->{$temp[$headercol->{cty}]}->{EXP_CITY}=$temp[$headercol->{city}];
            $self->{DATA}->{$temp[$headercol->{cty}]}->{EXP_ADM}=$temp[$headercol->{adm}];
            if (defined($headercol->{postcode})) {$self->{DATA}->{$temp[$headercol->{cty}]}->{EXP_POSTCODE}=$temp[$headercol->{postcode}];}
            else {$self->{DATA}->{$temp[$headercol->{cty}]}->{EXP_POSTCODE}="";}
            
        }
        close (EXCEPTIONS);
        $self->{EXCEPTIONSFILEEXISTS}=1;
    }
    return $self;
}

sub GetCount {
    my $self=shift;
    my $var=shift;
    $var="COUNT_".$var;
    if (defined($self->{$var})) {return $self->{$var};}
    else {return "ERROR";}
}

sub CleanAndParse {
    my $self=shift;
    $self->CleanFields;
    $self->Parse;
    return $self;
}

sub CleanFields {
    my $self=shift;
    foreach my $CTY (keys (%{$self->{DATA}})) {
        my @cleanlist=("EXP_CITY","EXP_ADM","EXP_POSTCODE");
        foreach my $unit (@cleanlist) {
            if (defined ($self->{DATA}->{$CTY}->{$unit})) {
                my $copy=$self->{DATA}->{$CTY}->{$unit};
                $copy=$self->CleanString($copy);
                if ($copy ne $self->{DATA}->{$CTY}->{$unit}) {$self->{DATA}->{$CTY}->{EXP_CLEANED}=1;}
                $self->{DATA}->{$CTY}->{$unit}=$copy;
            }
        }
        if (defined ($self->{DATA}->{$CTY}->{EXP_CITY}) || defined($self->{DATA}->{$CTY}->{EXP_ADM})) {
            undef my @temp;
            if (defined ($self->{DATA}->{$CTY}->{EXP_CITY})) {push (@temp,$self->{DATA}->{$CTY}->{EXP_CITY});}
            if (defined ($self->{DATA}->{$CTY}->{EXP_ADM})) {push (@temp,$self->{DATA}->{$CTY}->{EXP_ADM});}
            $self->{DATA}->{$CTY}->{EXP_LIST}=\@temp;
            $self->{DATA}->{$CTY}->{EXP_STR}=join("#",@{$self->{DATA}->{$CTY}->{EXP_LIST}});
        }
    }  
    return $self;
}

sub Parse {
    my $self=shift;
    my $PostcodeMethod=PostalCodes->new(uc($self->{GLOBALS}->{ISO9660}));
    foreach my $CTY (keys (%{$self->{DATA}})) {
        my $OrigCTY=$CTY;
        my $PostCode=undef;
        ($PostCode,$CTY)=$PostcodeMethod->ExtractPostalCode($CTY);
        if (defined ($PostCode)) {$self->{DATA}->{$OrigCTY}->{PRS_POSTCODE}=$PostCode;}
        $CTY=$self->ReplaceStopWords($CTY);
        if ($CTY=~/\,/) {
            $self->{DATA}->{$OrigCTY}->{CTY_COMMASEP}=1;
            my @temp=split(",",$CTY);
            undef my @cleanedtemp;
            foreach my $unit (@temp) {
                $unit=$self->CleanString($unit);
                if (defined($unit) && $unit ne "") {push (@cleanedtemp,$unit);}
            }
            if (scalar(@cleanedtemp) > 0) {
                $self->{DATA}->{$OrigCTY}->{CTY_LIST}=\@cleanedtemp;
                $self->{DATA}->{$OrigCTY}->{CTY_STR}=join("#",@{$self->{DATA}->{$OrigCTY}->{CTY_LIST}});
                $CTY=$self->{DATA}->{$OrigCTY}->{CTY_STR};
                $CTY=~s/#/ /g;
                $CTY=~s/^\s{1,}//; $CTY=~s/\s{1,}$//; $CTY=~s/\s{1,}/ /;
            }
            else {$CTY=$self->CleanString($CTY);}
        }
        else {$CTY=$self->CleanString($CTY);}
        $self->{DATA}->{$OrigCTY}->{CTY_CLEAN}=$CTY;
        if ($OrigCTY ne $CTY) {$self->{DATA}->{$OrigCTY}->{CTY_CLEANED}=1;}
    }
    return $self;
}

sub ReplaceStopWords {
    my $self=shift;
    my $str=shift;
    foreach my $stopword (keys (%{$self->{STOPWORDS}})) {
        if ($str=~/(^|\s|\W)$stopword(\s|\W|$)/) {$str=~s/(^|\s|\W)$stopword(\s|\W|$)/$1$self->{STOPWORDS}->{$stopword}$2/;}
    }
    $str=~s/(^|\s|\W)NEAR(\s|\W|$)/$1$2/; #For the moment throw out the NEAR marker
    $str=~s/\s{1,}/ /; $str=~s/^\s//; $str=~s/\s$//;
    return $str;
}

sub LoadStopWords {
    my $self=shift;
    my $stopfile=$self->{GLOBALS}->{STOPWORDSFILE};
    open (STOPWORDS,"$stopfile") || die "Can't open the Stopwords file to read".$!;
    while (<STOPWORDS>) {
        chomp $_;
        my ($key,$value)=split("\t",$_);
        $self->{STOPWORDS}->{$key}=$value;
    }
    close (STOPWORDS);
    return $self;    
}

sub GetUnMatched {
    my $self=shift;
    my $type=shift;
    $self->{COUNT_MATCHES_LAST}=0;
    my $UnMatched={};
    foreach my $CTY (keys (%{$self->{DATA}})) {
        if (defined $self->{DATA}->{$CTY}->{$type}) {
            if (!defined ($self->{DATA}->{$CTY}->{MATCHED})) {
                $UnMatched->{$CTY}=$self->{DATA}->{$CTY}->{$type};
            }
        }
    }
    return $UnMatched;
}

sub GetMatched {
    my $self=shift;
    my $Letter=shift;
    my $Matched={};
    foreach my $CTY (keys (%{$self->{DATA}})) {
        if (defined ($self->{DATA}->{$CTY}->{MATCHED})) {
            if (defined ($self->{DATA}->{$CTY}->{$Letter."LIST"})) {
                $Matched->{$CTY}=$self->{DATA}->{$CTY}->{$Letter."LIST"};
            }
        }
    }
    return $Matched;
}

sub FileCleanMatches {
    my $self=shift;
    my $Working=shift;
    foreach my $CTY (keys (%$Working)) {
        if (defined($Working->{$CTY}->{A_NAME})) {
            $self->{DATA}->{$CTY}->{A}=1;
            $self->{DATA}->{$CTY}->{A_NAME}=$Working->{$CTY}->{A_NAME};
            $self->{DATA}->{$CTY}->{A_UNI}=$Working->{$CTY}->{A_UNI};
            $self->{DATA}->{$CTY}->{A_LAT}=$Working->{$CTY}->{A_LAT};
            $self->{DATA}->{$CTY}->{A_LONG}=$Working->{$CTY}->{A_LONG};
        }
        if (defined($Working->{$CTY}->{P_NAME})) {
            $self->{DATA}->{$CTY}->{P}=1;
            $self->{DATA}->{$CTY}->{P_NAME}=$Working->{$CTY}->{P_NAME};
            $self->{DATA}->{$CTY}->{P_UNI}=$Working->{$CTY}->{P_UNI};
            $self->{DATA}->{$CTY}->{P_LAT}=$Working->{$CTY}->{P_LAT};
            $self->{DATA}->{$CTY}->{P_LONG}=$Working->{$CTY}->{P_LONG};
        }
    }
    return $self;
}

sub ReturnMatches {
    my $self=shift;
    my $Matched=shift;
    my $MatchType=shift;
    my $MatchCount=0;
    foreach my $CTY (keys (%$Matched)) {
        $self->{DATA}->{$CTY}->{MATCHED}=1;
        $self->{DATA}->{$CTY}->{MATCH_TYPE}=$MatchType;
        $MatchCount++;
        if (defined ($Matched->{$CTY}->{ALIST})) {$self->{DATA}->{$CTY}->{ALIST}=$Matched->{$CTY}->{ALIST};}
        if (defined ($Matched->{$CTY}->{PLIST})) {$self->{DATA}->{$CTY}->{PLIST}=$Matched->{$CTY}->{PLIST};}
    }
    $self->{COUNT_MATCHES_LAST}=$MatchCount;
    $self->ComputeTotalMatches;
    return $self;
}

sub ComputeTotalMatches {
    my $self=shift;
    my $count=0;
    foreach my $CTY (keys (%{$self->{DATA}})) {
        if (defined($self->{DATA}->{$CTY}->{MATCHED})) {$count++;}
    }
    $self->{COUNT_MATCHES_TOTAL}=$count;
    return $self;
}

sub ReturnData {
    my $self=shift;
    my $CTY=shift;
    my @Vars=("CTY","CTY_STR","EXP_CITY","EXP_ADM","EXP_POSTCODE","EXP_STR","PRS_POSTCODE","A_NAME","A_UNI","A_LAT","A_LONG","P_NAME","P_UNI","P_LAT","P_LONG","MATCH_TYPE");
    if (!$CTY) {return @Vars;} #Return the header
    undef my @Return;
    foreach my $Var (@Vars) {
        if (defined($self->{DATA}->{$CTY}->{$Var})) {
            push (@Return,$self->{DATA}->{$CTY}->{$Var});
        }
        else {push (@Return,"");}
    }
    return @Return;
}

sub ReturnKeys {
    my $self=shift;
    my $Type=shift;
    if (!$Type) {return (sort {$a cmp $b} (keys(%{$self->{DATA}})));}
    else {
        undef my @KeysUnsorted;
        foreach my $CTY (keys(%{$self->{DATA}})) {
            if (defined($self->{DATA}->{$CTY}->{$Type})) {push (@KeysUnsorted,$CTY);}
            elsif (defined($self->{DATA}->{$CTY}->{MATCH_TYPE})) {
                if ($self->{DATA}->{$CTY}->{MATCH_TYPE} eq $Type) {push (@KeysUnsorted,$CTY);}
            }
        }
        return (sort {$a cmp $b} (@KeysUnsorted));
    }
}

1;
