# # Mark ranges in a string that match certain regular expressions # use 5.010; use strict; use warnings; use List::Util qw/reduce/; use List::MoreUtils qw/part/; use constant { PATTERNS => [ qr/gefall/, qr/alle/, qr/len/ ], MARK_START => '', MARK_END => '' }; sub insert_range(\@$$) { my ($rng, $beg, $end) = @_; my ($dsj, $ovl) = part { ($_->[0] >= $beg and not $_->[0] >= $end) or ($_->[1] >= $beg and not $_->[1] >= $end) or 0; } @{$rng}; $dsj //= [ ]; $ovl //= [ ]; foreach my $_ (@{$ovl}) { $beg = $_->[0] if ($_->[0] < $beg); $end = $_->[1] if ($_->[1] > $end); } @{$rng} = sort { $a->[0] <=> $b->[0]; } @{$dsj}, [ $beg, $end ]; } sub find_ranges($) { my ($str, @pat) = @_; my @rng = (); foreach my $pat (@{PATTERNS()}) { while ($str =~ m/$pat/g) { insert_range @rng, $-[0], $+[0]; } } return @rng; } sub split_ranges($@) { my ($str, @rng) = @_; my ($pos, @pcs) = (0); my $_; foreach (@rng) { if ($pos < $_->[0]) { push @pcs, [ 0, substr $str, $pos, $_->[0] - $pos ]; } push @pcs, [ 1, substr $str, $_->[0], $_->[1] - $_->[0] ]; $pos = $_->[1]; } if ($pos < length($str)) { push @pcs, [ 0, substr $str, $pos, length($str) - $pos ]; } return @pcs; } sub mark_ranges(@) { reduce { if ($b->[0]) { $a . MARK_START . $b->[1] . MARK_END; } else { $a .= $b->[1]; } } '', @_; } my $_; while () { chomp; $_ = mark_ranges split_ranges $_, find_ranges $_; say; } __DATA__ Wir alle sind auf den Witz hereingefallen.