几天没写了,主要都是自己的学习过程,贴一下curl / curl_multi_exec的一些代码,mark一下。
<?php /** * Created by PhpStorm. * User: f3ngt1ng * Date: 2017/2/23 * Time: 10:46 */ //今天巩固一下curl_multi_exec的技术,用proxy写一个简单多线程爬虫。 function curl_crawl($url, $proxy, $auth = array()){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT , 10); curl_setopt($ch, CURLOPT_HEADER, array('Connection: close')); if (isset($proxy)) curl_setopt($ch, CURLOPT_PROXY, $proxy); if (!empty($auth)) curl_setopt($ch, CURLOPT_PROXYUSERPWD, join(':', $auth)); $content = curl_exec($ch); curl_close($ch); return $content; } /*//$proxy = '200.255.220.211:8080'; $url = 'http://demo.com:8080/2.23/server.php'; $content = curl_crawl($url); echo $content; */ /** * @param $url * @param array $proxy * @param array $auth * @param int $threads */ function curl_multi_crawl($url = array(), $proxy = array(), $auth = array(), $threads = 1){ $mul = curl_multi_init(); $curl_handlers = array(); $results = array(); //非单URL多线程的情况 if ($threads === 1) { foreach ($url as $t){ $ch = curl_init(); $curl_handlers[$t] = $ch; curl_setopt($ch, CURLOPT_URL, $t); curl_setopt($ch, CURLOPT_HEADER, 0); //curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_MAXREDIRS, 5); curl_multi_add_handle($mul, $ch); } } if(!empty($proxy)){ foreach ($curl_handlers as $handler){ curl_setopt($handler, CURLOPT_PROXY, $proxy[mt_rand(0, (count($proxy)-1))]); } } /*foreach($curl_handlers as $url => $handler){ echo $url."=========".$handler."\r\n"; }*/ //执行 do { $mrc = curl_multi_exec($mul, $active); } while ($mrc == CURLM_CALL_MULTI_PERFORM); while ($active && $mrc == CURLM_OK) { if (curl_multi_select($mul) != -1) { do { $mrc = curl_multi_exec($mul, $active); } while ($mrc == CURLM_CALL_MULTI_PERFORM); } } foreach ($curl_handlers as $url => $handler){ $results[$url] = curl_multi_getcontent($handler); curl_multi_remove_handle($mul, $handler); } curl_multi_close($mul); var_dump($results); } /* $proxy = array('127.0.0.1:8888'); $url = array('http://demo.com:8080/2.23/server.php', 'http://www.baidu.com'); curl_multi_crawl($url, $proxy); */