#!/usr/bin/perl use CGI::Carp qw(fatalsToBrowser); use strict; use warnings; use CGI; use File::Find; use HTML::Parser; my $cgi = CGI->new(); print $cgi->header(type => 'text/plain'); my %params = $cgi->Vars(); my $string = ''; my $basedir = '/home/netzgrafik/www.lottermoser.at/test/'; my @files = (); find(\&find_files,$basedir); my %includes = search($params{terms},\@files); print_found($params{terms},\%includes); sub find_files{ push(@files,$File::Find::name) if(-f $File::Find::name && $_ =~ /\.htm$/); } sub print_found{ my ($terms,$hashref) = @_; print $terms," found in:\n"; foreach my $key(keys(%$hashref)){ print $key."\n" if($hashref->{$key} eq 'yes'); } } sub search{ my ($termsstring,$files) = @_; my @terms = split(/\s+/,$termsstring); my $parser = HTML::Parser->new( api_version => 3, start_h => [\&start,"self,tagname,attr"], text_h => [\&text,"self,dtext"], end_h => [\&end,"self,tagname"]); my %include; for my $html_file(@$files){ $string = ''; $parser->parse_file($html_file); foreach my $term (@terms) { $term = umlauts($term); if ($string =~ /$term/) { $include{$html_file} = 'yes'; last; } else { $include{$html_file} = 'no'; } } } return %include; } sub start{ my ($self,$tag,$attr) = @_; if($tag eq 'div' && $attr->{class} eq 'scroll'){ $self->{search} = 1; } } sub text{ my ($self,$dtext) = @_; $string .= $dtext if($self->{search}); } sub end{ my ($self,$tag) = @_; if($tag eq 'div'){ $self->{search} = 0; } } sub umlauts{ my ($term) = @_; $term=~ s/&”/ä\;/g; $term=~ s/¾/Ä\;/g; $term=~ s/&–/ö\;/g; $term=~ s/÷/Ö\;/g; $term=~ s/&¸/ü\;/g; $term=~ s/Ð/Ü\;/g; $term=~ s/þ/ß\;/g; return $term; }