PHP 爬虫 两篇转载

http://hi.baidu.com/xiaojiang/item/774af38966cf44ca98255ff0

<?php
classCurlComponent{
   var $headers;
   var $user_agent;
   var $compression;
   var $cookie_file;
   var $proxy;
 
   functionset_value($cookies=TRUE,$cookie='cookies.txt',$compression='gzip',$proxy='') {
       $this->headers[] = "Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg";
       $this->headers[] = "Connection: Keep-Alive";
       $this->headers[] = "Content-type: application/x-www-form-urlencoded";
       $this->user_agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0)";
       $this->compression=$compression;
       $this->proxy=$proxy;
       $this->cookies=$cookies;
       if ($this->cookies == TRUE) $this->cookie($cookie);
   }
 
   functioncookie($cookie_file) {
       if (file_exists($cookie_file)) {
           $this->cookie_file=$cookie_file;
       } else {
           @fopen($cookie_file,'w')or$this->error("The cookie file could not be opened. Make sure this directory has the correct permissions");
           $this->cookie_file=$cookie_file;
           @fclose($cookie_file);
       }
   }
 
   functionget($url,$refer='') {
       $process =curl_init($url);
        curl_setopt($process,CURLOPT_REFERER, $refer);
        curl_setopt($process,CURLOPT_HTTPHEADER, $this->headers);
        curl_setopt($process,CURLOPT_USERAGENT, $this->user_agent);
       if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEFILE, $this->cookie_file);
       if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEJAR, $this->cookie_file);
        curl_setopt($process,CURLOPT_ENCODING, $this->compression);
        curl_setopt($process,CURLOPT_TIMEOUT, 30000);
       if ($this->proxy)curl_setopt($cUrl,CURLOPT_PROXY, 'proxy_ip:proxy_port');
        curl_setopt($process,CURLOPT_RETURNTRANSFER, 1);
       $return =curl_exec($process);
        curl_close($process);
       return $return;
   }
 
   functionpost($url,$data,$refer) {
       $process =curl_init($url);
        curl_setopt($process,CURLOPT_REFERER, $refer);
        curl_setopt($process,CURLOPT_HTTPHEADER, $this->headers);
        curl_setopt($process,CURLOPT_USERAGENT, $this->user_agent);
       if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEFILE, $this->cookie_file);
       if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEJAR, $this->cookie_file);
        curl_setopt($process,CURLOPT_ENCODING, $this->compression);
        curl_setopt($process,CURLOPT_TIMEOUT, 30000);
       if ($this->proxy)curl_setopt($cUrl,CURLOPT_PROXY, 'proxy_ip:proxy_port');
        curl_setopt($process,CURLOPT_POSTFIELDS, $data);
        curl_setopt($process,CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($process,CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($process,CURLOPT_POST, 1);
       $return =curl_exec($process);
        curl_close($process);
       return $return;
   }
 
   functionerror($error) {
       echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
       die;
   }
 
}
 
?>



http://www.hdj.me/get-cookie-without-cookiejar-by-curl

PHP中CURL类是一个非常牛逼的工具类,具体怎么牛逼就不啰嗦了。
对于COOKIE,CURL类也有很不错的支持,但不够灵活,并未能通过现成的方法以变量的方法获取到,而以要通过以下方法实现。

// 把COOKIE保存至cookie.txt
curl_setopt($ch, CURLOPT_COOKIEFILE,'cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR,'cookie.txt');


先把COOKIE保存文件,调用的时候还得读取文件,这样意味着两次的IO操作,效率如何,不用说大家都清楚了。
那么有没有办法可以绕过写读文件呢?不卖关子,直接上代码:

// 初始化CURL
$ch= curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
// 获取头部信息
curl_setopt($ch, CURLOPT_HEADER, 1);
// 返回原生的(Raw)输出
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
// 执行并获取返回结果
$content= curl_exec($ch);
// 关闭CURL
curl_close($ch);
// 解析HTTP数据流
list($header,$body) =explode("\r\n\r\n",$content);
// 解析COOKIE
preg_match("/set\-cookie:([^\r\n]*)/i",$header,$matches);
// 后面用CURL提交的时候可以直接使用
// curl_setopt($ch, CURLOPT_COOKIE, $cookie);
$cookie= $matches[1];


打完收工!欢迎大家来喷!





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值