#!/usr/bin/perl

use forks;
# oder "use threads",
# forks bringen hier die bessere Multiprozessorunterstützung denke ich.

use strict;
use warnings;

my $shared=100; # 100 Zeichen Überschneidung
my $chuncksize=10*1024*1024; # 10 MB

my $file='/home/topeg/test.random.txt';
my $regexp=qr/--TEST--/o;

# maximal 4 Prozesse das macht bei 10 MB pro Prozess 40 MB...
my $threads=4;

open(TRACEFILE, '<',  $file ) or die "cannot open $file $!\n";

my $found=0;
my $chunk;
my $old="";

my @running;
my $pos=0;
while (read(TRACEFILE, $chunk, $chuncksize))
{
  $chunk=$old.$chunk;

  # erstmal alle Prozesse erzeugen
  if(@running < $threads)
  { push(@running,get_thread($chunk)); }
  else
  {
    #auf einen Prozess warten ...
    $found+=$running[$pos]->join();
    #neuen erzeugen ...
    $running[$pos]=get_thread($chunk);
    # einen weiter
    $pos++;
    # Liste wieder von vorne beginnen
    $pos=0 if($pos >= $threads);
  }
  $old = substr($chunk,-$shared,$shared);
  $old =~ s/$regexp//gs;
}

# auf die restlichen warten....
$pos=0;
while($pos<$threads)
{
  $found+=$running[$pos]->join();
  $pos++;
}

print "anzahl treffer: $found\n";

exit(0);
###############################################
# thread/prozess erzeugen
sub get_thread
{
  my $thread=threads->create(\&parse, shift);
  die "error create thread" unless(defined($thread));
  return $thread;
}

#  die Arbeit erledigen
sub parse
{
  my $found=0;
  my $chunk=shift;
  $found++ while($chunk =~ m/$regexp/gsc);
  $chunk="";
  return $found;
}