Thread Datei mit x Zeilen und 5 Spalten auslesen | wget $1 $2 $3 usw.
(25 answers)
Opened by guest newbie at 2009-09-24 11:34
Leider ist das LWP-Paket ziemlich Umfangreich und hat viele Abhängigkeiten zu anderen Modulen, aber du solltest es einfach auf den Server kopieren können, da es nur Perl ist.
Du könntest schauen ob "LWP::UserAgent" installiert ist. Code: (dl
)
perl -MLWP::UserAgent -e 1 Wenn nicht, ich hatte mir mal ein ganz einfaches Modul geschrieben um Webseiten zu laden. Es hat zwar einige Einschränkungen, aber möglicherweise funktioniert es für dich. Ein Anwendungsbeispiele: test_miniwget.pl Code (perl): (dl
)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 #!/usr/bin/perl use Data::Dumper; use miniwget; use strict; use warnings; my $site=miniwget->new(shift(@ARGV)); if($site->success()) { print "#"x80,"\n"; print $site->body(); print "#"x80,"\n"; } else { print "#"x80,"\n"; print "MESSAGE:".$site->message()."\n"; print "STATUS:".$site->status()."\n"; print "#"x80,"\n"; print "HOST:".$site->host()."\n"; print "PORT:".$site->port()."\n"; print "PATH:".$site->path()."\n"; print "#"x80,"\n"; print "HEADER:\n"; print Dumper($site->header()); print "#"x80,"\n"; print "BODY:\n"; print $site->body()."\n"; print "#"x80,"\n"; } Aufruf: Code: (dl
)
test_miniwget 'http://www.test.de/' oder: Code (perl): (dl
)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 #!/usr/bin/perl use miniwget; use strict; use warnings; my $site=miniwget->new(); if($site->parse('http://www.test.de/')) { if($site->load()) { print $site->body(); } } oder: Code (perl): (dl
)
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/usr/bin/perl use miniwget; use strict; use warnings; my $site=miniwget->new(); $site->host('www.test.de'); $site->path('/index.html'); $site->load(); print $site->body() if($site->success()); Hier das Modul: Es hat nur Abhängigkeiten zu Coremodulen, macht keine Quotes der URL und kann nur "GET". Der URL-Parser schluckt nicht alles und manche falsche URL nimmt er. miniwget.pm Code (perl): (dl
)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 package miniwget; use strict; use warnings; use IO::Socket; my $send_header=<<'EOH'; GET %s HTTP/1.1 Accept: text/html,*/* Accept-Language: de-DE,de,de-de,en-us,en Accept-Encoding: deflate User-Agent: Mozilla/5.0 (X11; U; Linux; cl-CL; rv:42) Gecko/2229082006 Fake/42 (Clingon) Host: %s Connection: Close EOH # get an website # accepts url like http://test.com/a/smal/path?option=txt... # $site=wget_mini->new([$url]) sub new { my $class=shift; my $url=shift; my $self= bless({},$class); $self->load($url) if($url); return $self; } # request success sub success { my $stat=shift()->{status}; return $stat >= 200 && $stat < 300; } # get the header hash sub header { return shift()->{header}; } # get the body sub body { return shift->{body}; } # get messages sub message { return shift->{message}; } # get statusnumber sub status { return shift->{status}; } # get/set host sub host { my $self=shift; my $host=shift; if($host) { my $old=$self->{host}; $self->{host}=$host; return $old; } else { return $self->{host}; } } # get/set port sub port { my $self=shift; my $port=shift || 0; if($port >0 ) { my $old=$self->{port}; $self->{port}=$port; return $old; } else { return $self->{port}; } } #get/set path sub path { my $self=shift; my $path=shift; if($path) { my $old=$self->{path}; $self->{path}=$path; return $old; } else { return $self->{path}; } } # load site # $ok=$site->load([$url]); sub load { my $self=shift; my $url=shift; $self->{header}={}; $self->{body}=''; $self->{message}=''; $self->{status}=-1; if($url && !$self->parse($url)) { $self->{message}='no url'; $self->{status}='000'; return 0; } if(!$self->{host}) { $self->{message}='no host'; $self->{status}='001'; return 0; } $self->{port}=80 unless($self->{port}); $self->{path}='/' unless($self->{path}); my $socket=IO::Socket::INET->new(PeerAddr => $self->{host}, PeerPort => $self->{port} ); if(!$socket) { $self->{message}='no connectinon to host'; $self->{status}='002'; return 0; } binmode($socket); # create header my $header=sprintf($send_header,$self->{path},$self->{host}); # cleanup header $header=~s/[\x0D\x0A]+/\x0D\x0A/gs; $header.="\x0D\x0A"; # send header: print $socket $header; # read header { local $/="\x0A"; while (my $line = <$socket>) { $line=~s/[\x0D\x0A]+//; last unless ($line); $self->{header}->{lc($1)}=$2 if($line=~m!^\s*([\w-]+)\s*:\s*(.+?)[\x0D\x0A]*$!); if($line=~m!\s*HTTP/[\d.]+\s+(\d+)\s+(.+?)[\x0D\x0A]*$!) { $self->{status}=$1; $self->{message}=$2; } } } # read body if($self->{header}->{'content-length'}) { my $length=$self->{header}->{'content-length'}; $socket->read($self->{body},$length) if($length>0); } else { # read all you get while(!$socket->eof()) { $self->{body}.=$socket->getc(); } } return 1; } # parse url # $ok=$site->parse($url); sub parse { my $self=shift; my $url=shift || ''; if($url=~m"^http://((?:[-\w]+\.)+\w{1,3})((?::\d{1,5})?)(.*)$") { $self->{host}=$1; $self->{port}=$2 || 80; $self->{path}=$3 || '/'; $self->{port}=~s/^://; return 1; } return 0; } 1; |