###########################
#     Match::Gram.pm      #
#      Version 1.01       #
#    (c) Ed Egan 2009     # 
###########################

package Gram;
use strict;
use warnings;

sub new {
    my $class = shift;
    my $charset=shift;
    my $gramlength=shift;
    if (!$charset) {$charset=1;}
    if (!$gramlength) {$gramlength=3;}
    my $self = bless {}, $class;
    $self->{GRAMLENGTH}=$gramlength;
    $self->{CHARSETINDEX}=$charset;
    $self->GetCharSet($self->{CHARSETINDEX});
    return $self;
}

sub GetCharSet {
    my $self=shift;
    my $code=shift;
    my %CharSet=(
        1=>"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
        2=>"0123456789",
        3=>" "
    );
    $CharSet{4}=$CharSet{1}.$CharSet{2}; #Alphanumeric
    $CharSet{5}=$CharSet{4}.$CharSet{3}; #Alphanumeric plus space
    $CharSet{6}=$CharSet{1}.$CharSet{3}; #Alpha plus space
    $self->{CHARS}=$CharSet{$code};
    @{$self->{CHARSET}}=split(//,$self->{CHARS});
}

sub GetNGrams {
    my $self=shift;
    my $string=shift;
    my $gramset={};
    my $str=uc($string);
    my $pattern=$self->{CHARS};
    $str=~s/[^$pattern]//ig;
    my @strchars=split("",$str);
    if (scalar (@strchars) < $self->{GRAMLENGTH}) {$gramset->{"#"}=1; return $gramset;} #Note the return of the single unmatchable
    while (scalar(@strchars) >= $self->{GRAMLENGTH}) {
        my $currentgram="";
        for (my $i=0; $i<=($self->{GRAMLENGTH}-1); $i++) {
            $currentgram=$currentgram.$strchars[$i];
        }
        if (defined($gramset->{$currentgram})) {$gramset->{$currentgram}++;}
        else {$gramset->{$currentgram}=1;}
        shift(@strchars);
    }
    return $gramset;
}
    
sub Score {
    my $self=shift;
    my $left=shift; #takes two hashes left and right
    my $right=shift;
    my $leftscore=0; 
    foreach my $leftkey (keys (%$left)) {
        if (defined($right->{$leftkey})) {
            my $score=($left->{$leftkey} <= $right->{$leftkey} ? $left->{$leftkey} : $right->{$leftkey});
            $leftscore=$leftscore+$score;
        }
    }
    return ($leftscore);
}

sub Total {
    my $self=shift;
    my $left=shift; #takes one hash (left)
    my $lefttotal=0;
    foreach my $leftkey (keys (%$left)) {
        $lefttotal=$lefttotal+$left->{$leftkey};
    }
    return ($lefttotal);
}    

sub BuildGramList {
    my $self=shift;
    undef my @gramset;
    for (my $length=1;$length<=$self->{GRAMLENGTH};$length++) {
        undef my @tempgramset;
        foreach my $char (@{$self->{CHARSET}}) {
            if ($length==1) {push @tempgramset,$char;}
            else {foreach (@gramset) {push (@tempgramset,$_.$char);}}
        }
        @gramset=@tempgramset;
    }
    $self->{GRAMSET}=\@gramset;
    foreach (sort(@{$self->{GRAMSET}})){$self->{GRAMCOUNT}->{$_}=0;}
    return $self;
}

sub Index {
    my $self=shift;
    my $List=shift;
    my $IndexLetter=shift;
    if (!defined $IndexLetter) {$IndexLetter=1;}
    my $IndexName="INDEX_".$IndexLetter;
    foreach my $ListElement (@$List) {
        $self->{$IndexName}->{$ListElement}=$self->GetNGrams($ListElement);
    }
    return $self;
}

sub Lookup {
    my $self=shift;
    my $lookups=shift; #Expects array ref;
    my $IndexLetter=shift;
    if (!defined $IndexLetter) {$IndexLetter=1;}
    my $IndexName="INDEX_".$IndexLetter;
    my $rethash={};
    if (defined $lookups) {
        foreach my $key (@$lookups) {
            if (!defined ($self->{$IndexName}->{$key})) {
                $self->{$IndexName}->{$key}=$self->GetNGrams($key);
            }
            $rethash->{$key}=$self->{$IndexName}->{$key};
        }
    }
    return $rethash;
}

sub ReverseIndex {
    my $self=shift;
    my $ref=shift; #expects a ref to an array of keys
    my $IndexLetter=shift;
    if (!defined $IndexLetter) {$IndexLetter=1;}
    my $IndexName="INDEX_REVERSE_".$IndexLetter;
    my $ForwardIndexName="INDEX_".$IndexLetter;
    if (defined ($ref)) {
        foreach my $key (@$ref) {
            my $gramset=undef;
            if (defined ($self->{$ForwardIndexName}->{$key})) {
                $gramset=$self->{$ForwardIndexName}->{$key};
            }
            else {
                $gramset=$self->GetNGrams($key);
                $self->{$ForwardIndexName}->{$key}=$gramset;
            }
            foreach my $gram (keys (%$gramset)) {
                if (defined ($self->{$IndexName}->{$gram})) {
                    push (@{$self->{$IndexName}->{$gram}},$key);
                }
                else {
                    @{$self->{$IndexName}->{$gram}}=($key);
                }
            }
        }
    }
    return $self;
}

sub ReverseList {
    my $self=shift;
    my $gram=shift;
    my $IndexLetter=shift; if (!defined $IndexLetter) {$IndexLetter=1;}
    my $IndexName="INDEX_REVERSE_".$IndexLetter;
    if (defined($self->{$IndexName}->{$gram})){return $self->{$IndexName}->{$gram};}
    else {return undef;}
}

1;
