#!/usr/bin/perl
use CGI::Carp qw(fatalsToBrowser);
use strict;
use warnings;
use CGI;
use File::Find;
use HTML::Parser;
my $cgi = CGI->new();
print $cgi->header(type => 'text/plain');
my %params = $cgi->Vars();
my $string = '';
my $basedir = '/home/netzgrafik/www.lottermoser.at/test/';
my @files = ();
find(\&find_files,$basedir);
my %includes = search($params{terms},\@files);
print_found($params{terms},\%includes);
sub find_files{
push(@files,$File::Find::name) if(-f $File::Find::name && $_ =~ /\.htm$/);
}
sub print_found{
my ($terms,$hashref) = @_;
print $terms," found in:\n";
foreach my $key(keys(%$hashref)){
print $key."\n" if($hashref->{$key} eq 'yes');
}
}
sub search{
my ($termsstring,$files) = @_;
my @terms = split(/\s+/,$termsstring);
my $parser = HTML::Parser->new(
api_version => 3,
start_h => [\&start,"self,tagname,attr"],
text_h => [\&text,"self,dtext"],
end_h => [\&end,"self,tagname"]);
my %include;
for my $html_file(@$files){
$string = '';
$parser->parse_file($html_file);
foreach my $term (@terms) {
$term = umlauts($term);
if ($string =~ /$term/) {
$include{$html_file} = 'yes';
last;
}
else {
$include{$html_file} = 'no';
}
}
}
return %include;
}
sub start{
my ($self,$tag,$attr) = @_;
if($tag eq 'div' && $attr->{class} eq 'scroll'){
$self->{search} = 1;
}
}
sub text{
my ($self,$dtext) = @_;
$string .= $dtext if($self->{search});
}
sub end{
my ($self,$tag) = @_;
if($tag eq 'div'){
$self->{search} = 0;
}
}
sub umlauts{
my ($term) = @_;
$term=~ s/&â/ä\;/g;
$term=~ s/¾/Ã\;/g;
$term=~ s/&â/ö\;/g;
$term=~ s/÷/Ã\;/g;
$term=~ s/&¸/ü\;/g;
$term=~ s/Ã/Ã\;/g;
$term=~ s/þ/Ã\;/g;
return $term;
}