为了搞科研,最近用php写了个爬虫抓图像,要交给实验室的同学用,苦于需要安装一大堆软件,还要小心配置,实在麻烦。向做PHP的同学取经,推荐了Windows下的PHP集成发布软件APMServ,稳定版本是5.2.6,对应PHP版本也是5.2.6,完全的傻瓜式一键配置。
为了不喧宾夺主,直接给出官方网站: APMServ5.2.6。
说说我的PHP学习感受,PHP的语法和C非常像,会写C会写PHP此言不虚。另外,PHP的变量前面有一个$符号,每次都让我觉得是不是PHP的发明人很缺钱,搞个货币符号提醒自己要多写点代码,这样就可以发工资了。
分享一个PHP Spider的代码:
<?php
class spider {
public $url; //访问地方站
public $webcookies; //cookies/session
public $refer; //来路
public $PROXY = ""; //代理IP
public $encoding; //编码
public $cmod; //new为新会话
public $jmod; //'jump'跳入下一页面
public $hmod; //'noheader'不带协议头。
public $type; //'bin' 不进行编码转换 图片已二进制输出
public $postfield; //提交数据
public $time = 30; //超时时间
public $contents; //返回内容页面
public function methodGet() {
//初始化配置
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->time);
if($this->PROXY != ""){
curl_setopt ($ch, CURLOPT_PROXY, "http://".$this->PROXY) ;
}
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Keep-Alive: 300','Connection: keep-alive')) ;
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; GTB7.4; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)');
curl_setopt($ch, CURLOPT_HTTPGET,1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
if(strcmp($this->jmod , 'jump')==0)
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
$refer = parse_url($this->url) ;
if(empty($this->refer))
curl_setopt($ch, CURLOPT_REFERER,$refer);
else
curl_setopt($ch, CURLOPT_REFERER,$this->refer);
curl_setopt($ch, CURLOPT_URL,$this->url);
switch ($this->cmod){
case 'new':
curl_setopt($ch, CURLOPT_COOKIESESSION, 1);
break;
default:
curl_setopt($ch, CURLOPT_COOKIE,$this->webcookies);
}
switch($this->hmod){
case 'noheader':
curl_setopt($ch, CURLOPT_HEADER,0);
break;
default:
curl_setopt($ch, CURLOPT_HEADER,1);
}
$this->contents = curl_exec($ch);
curl_close($ch);
if ($this->contents=='')
return FALSE;
if ($this->type!='bin'){ //是否是文本
//对结果进行字符集转换
if ($this->encoding!="UTF-8")
$this->contents=mb_convert_encoding($this->contents ,"UTF-8",$this->encoding);
}
//获取头部的cookie并保存到本对象的cookies字段。
$tmp = explode(';',$this->webcookies) ;
if(empty($this->webcookies))
$tmp = array() ;
$tmp2=array() ;
foreach($tmp as $key)
{
$tmp1 = explode('=',$key,2) ;
$tmp2[$tmp1[0]]=$tmp1[1] ;
}
preg_match_all("/Set-Cookie: (.*)[;\r\n]{1,1}/isU", $this->contents, $results);
foreach($results[1] as $key)
{
$tmpnow1=explode('=',$key,2) ;
$tmp2[$tmpnow1[0]]=$tmpnow1[1] ;
}
$tmpcookies = '' ;
foreach($tmp2 as $i=>$key)
{
if($i != '')
$tmpcookies = $tmpcookies.$i.'='.$key.';';
}
$tmpcookies = substr($tmpcookies , 0 , -1) ;
$this->webcookies = $tmpcookies ;
return $this->contents;
}
public function methodPost() {
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
if($this->PROXY != ""){
curl_setopt ($ch, CURLOPT_PROXY, "http://".$this->PROXY) ;
}
curl_setopt($ch, CURLOPT_TIMEOUT, $this->time);
/*curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Expect:',
'application/x-www-form-urlencoded',
'X-MicrosoftAjax: Delta=true',
));
*/
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Keep-Alive: 300','Connection: keep-alive')) ;
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; GTB7.4; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)');
curl_setopt($ch, CURLOPT_POST,1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_URL,$this->url);
if(strcmp($this->jmod , 'jump')==0)
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
$refer = parse_url($this->url) ;
if(empty($this->refer))
curl_setopt($ch, CURLOPT_REFERER,$refer);
else
curl_setopt($ch, CURLOPT_REFERER,$this->refer);
curl_setopt($ch, CURLOPT_POSTFIELDS,$this->postfield);
switch ($this->cmod){
case 'new':
curl_setopt($ch, CURLOPT_COOKIESESSION, 1);
break;
default:
curl_setopt($ch, CURLOPT_COOKIE, $this->webcookies);
}
switch($this->hmod){
case 'noheader':
curl_setopt($ch, CURLOPT_HEADER,0);
break;
default:
curl_setopt($ch, CURLOPT_HEADER,1);
}
$this->contents = curl_exec($ch);
curl_close($ch);
if ($this->contents=='')
return FALSE;
//对结果进行字符集转换
if ($this->encoding!="UTF-8")
$this->contents=mb_convert_encoding($this->contents ,"UTF-8",$this->encoding);
//获取头部的cookie并保存到本对象的cookies字段。
$tmp = explode(';',$this->webcookies) ;
if(empty($this->webcookies))
$tmp = array() ;
$tmp2=array() ;
foreach($tmp as $key)
{
$tmp1 = explode('=',$key,2) ;
$tmp2[$tmp1[0]]=$tmp1[1] ;
}
preg_match_all("/Set-Cookie: (.*)[;\r\n]{1,1}/isU", $this->contents, $results);
foreach($results[1] as $key)
{
$tmpnow1=explode('=',$key,2) ;
$tmp2[$tmpnow1[0]]=$tmpnow1[1] ;
}
$tmpcookies = '' ;
foreach($tmp2 as $i=>$key)
{
if($i != '')
$tmpcookies = $tmpcookies.$i.'='.$key.';';
}
$tmpcookies = substr($tmpcookies , 0 , -1) ;
$this->webcookies = $tmpcookies ;
return $this->contents;
}
public function url($url) {
$this->url = $url;
return $this;
}
public function SetCookies($cookies) {
$this->webcookies = $cookies;
return $this;
}
public function ip($ip) {
$this->PROXY = $ip;
return $this;
}
public function code($code) {
$this->encoding = $code;
return $this;
}
public function cmod($cmod) { //new为新会话
$this->cmod = $cmod;
return $this;
}
public function jmod($jmod){ //'jump'跳入下一页面
$this->jmod = $jmod;
return $this;
}
public function hmod($hmod) { //'noheader'不带协议头。
$this->hmod = $hmod;
return $this;
}
public function type($type) { //'bin' 不进行编码转换 图片已二进制输出
$this->type = $type;
return $this;
}
public function post($postdata) { //提交数据
$this->postfield = $postdata;
return $this;
}
public function buytime($time) {
$this->time = $time ;
return $this;
}
public function refer($refer) {
$this->refer = $refer;
return $this;
}
public function clear() {
$this->url = ""; //访问地方站
$this->webcookies = ""; //cookies/session
$this->refer = ""; //来路
$this->PROXY = ""; //代理IP
$this->encoding = ""; //编码
$this->cmod = ""; //new为新会话
$this->jmod = ""; //'jump'跳入下一页面
$this->hmod = ""; //'noheader'不带协议头。
$this->type = ""; //'bin' 不进行编码转换 图片已二进制输出
$this->postfield = ""; //提交数据
$this->time = 30; //超时时间
$this->contents = ""; //返回内容页面
}
}
?>