1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
package toc; use strict; use warnings; use 5.020; use HTML::Parser; use vars qw( $toc ); sub toc { my $source = shift; my @parsed = (); my $tag_sub = sub { my $s = shift; my $tag = shift; if ( $tag =~ m/[hH](\d)/ ) { push @parsed, "level $1: $s"; } }; my $text_sub = sub { push @parsed, shift; }; # HTML-Parser erzeugen my $p = HTML::Parser->new( api_version => 3, start_h => [ $tag_sub, "text,tagname" ], end_h => [ $tag_sub, "text,tagname" ], #process_h => [ $text_sub, "text" ], #comment_h => [ $text_sub, "text" ], #declaration_h => [ $text_sub, "text" ], #default_h => [ $text_sub, "text" ], ); $p->empty_element_tags(1); $p->report_tags( qw(h1 h2 h3 h4 h5 h6) ); $p->xml_pic(1); $p->utf8_mode(1); $p->case_sensitive(1); $p->parse($source); $p->eof(); return "\n\n<!--\n" . join("\n",@parsed) . "\n-->\n\n"; } sub story { my ( $story_ref ) = @_; $toc = toc::toc($$story_ref); $$story_ref =~ s/\Q{{{%%TOC%%}}}\E/Inhalt:\n$toc/; return 1; } 1; package main; #use toc; my $DATA = join "",<DATA>; toc::story(\$DATA); say $toc::toc; __DATA__ {{{%%TOC%%}}} <h1>Test 1</H1> <p>Test</p> <h2>Test 2</H2> <p>Test 123</p> <h2>Test 3</H2> <p>Test</p> <h1>Test 4</H1> <p>Test</p> <h1>Test 5</H1> <p>Test</p>
text_h => [ $text_sub, "text" ],
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
sub toc {
my $source = shift;
my @parsed;
my $h_flag;
my $data;
my $text_sub = sub {
if ($h_flag) {
$data->{text} = shift;
push @parsed, $data;
$data = undef;
$h_flag = 0;
}
};
## my $end_sub = sub {
## if ($h_flag) {
## $data = undef;
## $h_flag = 0;
## }
## };
my $tag_sub = sub {
my $s = shift;
my $tag = shift;
my $self = shift;
if ( $tag =~ m/[hH](\d)/ ) {
$data = { level => $1, tag => $tag, text => "" };
$h_flag = 1;
}
};
# HTML-Parser erzeugen
my $p = HTML::Parser->new(
api_version => 3,
start_h => [ $tag_sub, "text,tagname,self" ],
#end_h => [ $end_sub, "text,tagname,self" ],
text_h => [ $text_sub, "text,self" ],
#process_h => [ $text_sub, "text,self" ],
#comment_h => [ $text_sub, "text,self" ],
#declaration_h => [ $text_sub, "text,self" ],
#default_h => [ $text_sub, "text,self" ],
);
$p->empty_element_tags(1);
$p->report_tags(qw(h1 h2 h3 h4 h5 h6));
$p->xml_pic(1);
$p->utf8_mode(1);
$p->case_sensitive(1);
$p->parse($source);
$p->eof();
return @parsed;
} ## end sub toc
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#!/usr/bin/perl -w use strict; my @result; my $content = do { local $/ = undef; <DATA> }; $content =~ s/<\/h(\d)>/<\/h$1>\n/gi; while ($content =~ /<h(\d)>(.*)<\/h\d>/gmi) { push( @result, "Level: ".$1.": ".$2); } foreach(@result){ print $_."\n"; } __DATA__ {{{%%TOC%%}}} <h1>Test 1</H1> <p>Test</p> <h2>Test 2</H2> <p>Test 123</p> <h2>Test 3</H2> <p>Test</p> <h1>Test 4</H1> <p>Test</p> <h3>Test 5</H3> <h4>Test 6</H4> <h1>Test 7</H1> <p>Test</p>
1
2
3
4
5
6
7
Level: 1: Test 1
Level: 2: Test 2
Level: 2: Test 3
Level: 1: Test 4
Level: 3: Test 5
Level: 4: Test 6
Level: 1: Test 7