#!/usr/bin/perl -w

# Extract all plain text from an HTML file

use strict;
use HTML::Parser 3.00 ();

my %inside;

sub tag
{
 Â  my($tag, $num) = @_;
 Â  $inside{$tag} += $num;
 Â  print " "; Â # not for all tags
}

sub text
{
 Â  Â return if $inside{script} || $inside{style};
 Â  Â print $_[0];
}

HTML::Parser->new(api_version => 3,
Â Â Â Â Â Â Â Â  Â handlers Â  Â => [start => [\&tag, "tagname, '+1'"],
Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 
Â  Â end Â  => [\&tag, "tagname, '-1'"],
Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 
Â  Â text Â => [\&text, "dtext"],
Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 
Â  ],
Â Â Â Â Â Â Â Â  Â marked_sections => 1,
Â Â Â Â )->parse_file(shift) || die "Can't open file: $!\n";;