Changes

Jump to navigation Jump to search
no edit summary
#!/usr/bin/perl -w
#Lines that start with a # are comments that aren't read by the interpreter
use strict;
#The strict module forces us to declare variables before we use them
my @Textfile;
#Declare an array called TextFile
open (DATA,"Data.txt");
#Open a filehandle on our file
while (<DATA>) {
#Read the data from the filehandle, line by line
chomp $_;
#$_ is a special variable - it captures the line being read from the filehandle here
if (!$_) {next;}
#if the line is undefined (i.e. blank) move to the next loop iteration
my $line = $_;
#Set a local variable called line to $_
push (@Textfile, $line);
#Push the line onto the Textfile array
}
my $Doccell;
#Declare the Doccell variable
for (my $i=0; $i<=$#Textfile; $i++) {
#Do a for loop, starting from i=0, going while i is less than the
#last index of the Textfile array, and incrementing by one each time
if ($Textfile[$i]=~/^Document\(s\):/) {$Doccell=$i;}
#Test to see if the entry matches a regular expression, if it does record the index
}
my @docs = splice(@Textfile,$Doccell);
#Create a next array by splicing out everything after the index we just found
shift @docs;
#Remove the first element of the docs array
my $Firm = shift @Textfile;
#Set Firm equal to the first element of Textfile (which we just removed)
my $Violation =shift(@Textfile);
#Set Violation equal to the (new) first element of Textfile (which we just removed)
my $Offense={};
#Create an anonymous hash
foreach my $cell (@Textfile) {\
#Iterative over Textfile, setting the current iteration to cell
my ($name,@value)=split(":",$cell);
#Spill the cell on :
my $value=join(":",@value);
#Join the Value array on :
$Offense->{$name}=$value;
#Set an entry in the Offense hash
}
$Offense->{"DocList"}=\@docs;
#Set the doclist entry in the Offense hash to a reference to the docs array
my $Master=[];
#Define an anonymous array
$Master->[0]={};
#Define an anonymous hash in the zeroth cell of the anonymous array
$Master->[0]->{FirmName}=$Firm;
#Set a hash entry
$Master->[0]->{Offense}=$Offense;
#Set a hash entry
$Master->[0]->{Violation}=$Violation;
#Set a hash entry
open(OUTPUT,">Result.txt");
#Open a filehandle for writing (overwrite the file if it exists)
print OUTPUT $Master->[0]->{FirmName};
#Print the output file an entry from the anonymous hash in the anonymous array
print OUTPUT "\t";
#Print a tab
print OUTPUT $Master->[0]->{Violation}."\t";
#Print another entry with another tab on the end
foreach my $key ( sort {$a cmp $b } (keys %{ $Master->[0]->{Offense} } )) {
#Iterate through the hash's keys, in alphabetical order, setting the current key to $key
print OUTPUT $Master->[0]->{Offense}->{$key}."\t";
#Print an entry, with a tab
}
print OUTPUT "\n";
#Print a new line
close OUTPUT;
#Close the output filehandle - this will flush the write buffer
==Modules==
use LWP::UserAgent;
#Use the LWP::UserAgent modules
my $ua = LWP::UserAgent->new;
#Create a new UserAgent
my $url="http://www.contractormisconduct.org/index.cfm/1,73,222,html?CaseID=2";
#Set up a string containing a URL
my $response = $ua->get($url);
#Use the UA 'get' method to retrieve the webpage. This returns an HTTP Response object
my $content=$response->decoded_content;
#Get the response as one long text string, so we can work with it...
use HTML::TreeBuilder;
#Use the HTML::TreeBuilder modules
my $tree = HTML::TreeBuilder->new; # empty tree
#Create a new tree object
$tree->parse($content);
#Load up the tree from the content string (that we got using UA)
my $dump=$tree->as_text;
#Dump the tree as text maybe
my $incidentelement=$tree->look_down("id","primecontent");
#Or use HTML::Element methods to look_down the tree for a tag with some properties
Anonymous user

Navigation menu