#!/usr/bin/perl
use CGI::Carp qw(fatalsToBrowser);
use strict;
use warnings;
use CGI;
use File::Find;
use HTML::Parser;
my $cgi     = CGI->new();
print $cgi->header(type => 'text/plain');
my %params  = $cgi->Vars();
my $string  = '';
my $basedir = '/home/netzgrafik/www.lottermoser.at/test/';
my @files = ();
find(\&find_files,$basedir);
my %includes = search($params{terms},\@files);
print_found($params{terms},\%includes);
sub find_files{
    push(@files,$File::Find::name) if(-f $File::Find::name && $_ =~ /\.htm$/);
}
sub print_found{
    my ($terms,$hashref) = @_;
    print $terms," found in:\n";
    foreach my $key(keys(%$hashref)){
        print $key."\n" if($hashref->{$key} eq 'yes');
    }
}
sub search{
    my ($termsstring,$files) = @_;
    my @terms = split(/\s+/,$termsstring);
    
    my $parser = HTML::Parser->new(
                             api_version => 3,
                             start_h     => [\&start,"self,tagname,attr"],
                             text_h      => [\&text,"self,dtext"],
                             end_h       => [\&end,"self,tagname"]);
    my %include;
    for my $html_file(@$files){
        $string = '';
        $parser->parse_file($html_file);
        foreach my $term (@terms) {
            $term = umlauts($term);
            if ($string =~ /$term/) {
                $include{$html_file} = 'yes';
                last;
            }
            else {
                $include{$html_file} = 'no';
            }
        }
    }
    return %include;
}
sub start{
    my ($self,$tag,$attr) = @_;
    if($tag eq 'div' && $attr->{class} eq 'scroll'){
        $self->{search} = 1;
    }
}
sub text{
    my ($self,$dtext) = @_;
    $string .= $dtext if($self->{search});
}
sub end{
    my ($self,$tag) = @_;
    if($tag eq 'div'){
        $self->{search} = 0;
    }
}  
      
sub umlauts{
    my ($term) = @_;
    $term=~ s/&â/ä\;/g;
    $term=~ s/¾/Ã\;/g;
    $term=~ s/&â/ö\;/g;
    $term=~ s/÷/Ã\;/g;
    $term=~ s/&¸/ü\;/g;
    $term=~ s/Ã/Ã\;/g;
    $term=~ s/þ/Ã\;/g;
    return $term;
}