#!/usr/bin/perl use strict; use warnings; use utf8; use IO::File; use POSIX qw(tmpnam); use DEiXToBot; use WWW::Selenium; my $agent = DEiXToBot->new(); my $sel = WWW::Selenium->new( host => "localhost", port => 4444, browser => "*firefox", browser_url => "http://www.finanznachrichten.de/" ); $sel->start; for my $i (1..3000) { # --> load urls form a txt file one after each other # --> load/open file InputURLs.txt and go to line $i # --> make my $url = the current line[/b] my $url = "..."; # <-- something new needed here!?? $sel->open($url); $sel->wait_for_page_to_load(5000); $sel->pause(1); print "$i) $url\n"; print $fh $content; close $fh; $agent->get("file://$name"); unlink $name; if (! $agent->success) { die "Could not fetch the temp file!\n"; } $agent->build_dom(); $agent->load_pattern('C:\Users\myName\Documents\Privat\MyCase3\Deixto Patterns\ExtractLinksToContent.xml'); $agent->extract_content(); #-------------------------------------------------------------------------------------------> I'm not sure if I need this??? if (! $agent->hits) { die "Could not find any MPs/ records!\n"; } else { for my $record ($agent->records) { my @rec = @$record; my $party; my $logo = $rec[0]; if ($logo=~m#ND_Logo#) { $party = "N.D. (New Democracy)"; } elsif ($logo=~m#COALITION#) { $party = "SYRIZA Unitary Social Front"; } elsif ($logo=~m#PASOK#) { $party = "PA.SO.K. (Panhellenic Socialist Movement)"; } elsif ($logo=~m#ANEKS_ELL#) { $party = "ANEXARTITOI ELLINES (Independent Hellenes)"; } elsif ($logo=~m#xrisi#) { $party = "LAIKOS SYNDESMOS - CHRYSI AVGI (People's Association - Golden Dawn)"; } elsif ($logo=~m#small#) { $party = "DHM.AR (Democratic Left)"; } elsif ($logo=~m#KKE#) { $party = "K.K.E. (Communist Party of Greece)"; } elsif ($logo=~m#INDEPENDENT#) { $party = "INDEPENDENT"; } else { die "$logo => Unknown logo!\n"; } $rec[0] = $party; $rec[3]=~s#\s+# #g; #----------------------------------------------------------------------------------------------> open my $fh,">>:utf8","ContentLinks.txt"; print $fh join("\t",@rec)."\n"; close $fh; } } # --> Find the "Weiter >>" Button on the page and click it until there is no "Weiter >>" button anymore. restart at wait for page to load (line 30) } $sel->stop;