Perl利用CURL Multi实现单进程多连接的WEB访问
#!/usr/bin/perl -w use strict; use warnings; use WWW::Curl::Easy; use WWW::Curl::Multi; package UrlFetcher; sub new { my $option = shift || {}; $option->{MAX_CONN} = 2 unless defined($option->{MAX_CONN}); $option->{CONNECT_TIMEOUT} = 15 unless defined($option->{CONNECT_TIMEOUT}); $option->{READ_TIMEOUT} = 25 unless defined($option->{READ_TIMEOUT}); $option->{CB_URL} = sub{} unless defined($option->{CB_URL}); $option->{CB_RET} = sub{} unless defined($option->{CB_RET}); $option->{CB_WAIT} = sub {sleep(1)} unless defined($option->{CB_WAIT}); my $curls = {}; for (my $i = 1; $i < $option->{MAX_CONN} + 1; $i++) { $curls->{$i} = WWW::Curl::Easy->new(); } bless { OPTION => $option, CURLM => WWW::Curl::Multi->new(), IDLE_CURLS => $curls, BUSY_CURLS => {}, } } sub DESTROY { my $pkg = shift; foreach my $i (keys(%{$pkg->{IDLE_CURLS}})) { delete $pkg->{IDLE_CURLS}->{$i}; } foreach my $i (keys(%{$pkg->{BUSY_CURLS}})) { delete $pkg->{BUSY_CURLS}->{$i}; } delete $pkg->{CURLM}; } sub _prepareCurl { my $pkg = shift; my $active_handles = 0; foreach my $i (keys(%{$pkg->{IDLE_CURLS}})) { my $req = &{$pkg->{OPTION}->{CB_URL}}(); if (defined($req) && $req) { my $curl = $pkg->{IDLE_CURLS}->{$i}; delete $pkg->{IDLE_CURLS}->{$i}; my $data = {CURL=>$curl, REQ=>$req}; $data->{REQ}->{HTTP_CODE} = 0; $data->{REQ}->{HTTP_BODY} = ''; open (my $fileb, ">", \$req->{HTTP_BODY}); $curl->setopt(WWW::Curl::Easy::CURLOPT_WRITEDATA,$fileb); $curl->setopt(WWW::Curl::Easy::CURLOPT_HEADER, 0); $curl->setopt(WWW::Curl::Easy::CURLOPT_CONNECTTIMEOUT, $pkg->{OPTION}->{CONNECT_TIMEOUT}); $curl->setopt(WWW::Curl::Easy::CURLOPT_TIMEOUT, $pkg->{OPTION}->{READ_TIMEOUT}); $curl->setopt(WWW::Curl::Easy::CURLOPT_URL, $req->{HTTP_URL}); $curl->setopt(WWW::Curl::Easy::CURLOPT_PRIVATE,$i); $pkg->{BUSY_CURLS}->{$i} = $data; $pkg->{CURLM}->add_handle($curl); $active_handles++; }else { last; } } return $active_handles; } sub __processResult { my $pkg = shift; my $data = shift; &{$pkg->{OPTION}->{CB_RET}}($data); } sub perform { my $pkg = shift; my $active_handles = 0; my $pending_handles = $pkg->_prepareCurl(); $active_handles += $pending_handles; while ($active_handles) { my $active_transfers = $pkg->{CURLM}->perform(); if ($active_transfers != $active_handles) { while (my ($id,$return_value) = $pkg->{CURLM}->info_read()) { if ($id) { $active_handles--; my $data = $pkg->{BUSY_CURLS}->{$id}; delete $pkg->{BUSY_CURLS}->{$id}; $data->{REQ}->{RET} = $return_value; $data->{REQ}->{HTTP_CODE} = $data->{CURL}->getinfo(WWW::Curl::Easy::CURLINFO_HTTP_CODE); $pkg->__processResult($data->{REQ}); $pkg->{IDLE_CURLS}->{$id} = $data->{CURL}; } } } $pending_handles = $pkg->_prepareCurl(); &{$pkg->{OPTION}->{CB_WAIT}}() unless ($pending_handles); $active_handles += $pending_handles; die("bad items of easy curl") unless ($pkg->{OPTION}->{MAX_CONN} == scalar(keys(%{$pkg->{IDLE_CURLS}})) + scalar(keys(%{$pkg->{BUSY_CURLS}}))); } } 1; __END__ =head1 DOCUMENTATION use UrlFetcher; $i = 0; sub cb_url { my $ret = undef; $i++; $ret = {HTTP_URL => 'http://192.168.2.150/lht/lht.txt?idx='.$i, IDX => $i, MY_VALUE => '000',}; sleep(5) unless ($i % 20); return $ret; } sub cb_ret { my $d = shift; print 'INDEX: '. $d->{IDX}. "\n"; print 'MY VALUE: '. $d->{MY_VALUE}. "\n"; print 'RET CODE: '. $d->{RET}. "\n"; if ($d->{RET}) { print "RET MSG: BAD\n"; } else { print "RET MSG: OK\n"; } print 'HTTP CODE: '. $d->{HTTP_CODE}. "\n"; print 'HTTP BODY: '. $d->{HTTP_BODY}. "\n"; } sub cb_wait { sleep(1); } my $opt = {MAX_CONN=>10, CONNECT_TIMEOUT=>25, READ_TIMEOUT=>35, CB_URL=>\&cb_url, CB_RET=>\&cb_ret, CB_WAIT=>\&cb_wait, }; my $fetcher = UrlFetcher::new($opt); $fetcher->perform();