PHP中用来获取网页的途径主要有三个:
1 file_get_content 这个最简单,也最快,但不能构造http header头信息等
2 fscokopen
3 使用CURL库,
php 利用 curl 登录,并把cookie 值记录在该文件目录下的 .tmp 文件供下面使用;
等登录后,就可以获得网页的信息。curl 的参数可以自己设置(参考文档)。
以下 curl 代码经测试可正常运行,fscokopen 请自己测试,如有误请各位指正留言。
代码如下:
<?php
$login_url = 'http://www.xxx.com/login.php';
$login_request = 'user=username&pass=123456&submit=Login';
$after_login_url = 'http://www.xxx.com/info.php?campaign=43921&program=3&show_results=1&sub=0&site=0';
$cookie_jar = tempnam('./本文件所在的文件夹/', 'cookie');
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $login_url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $login_request);
//把返回来的cookie信息保存在$cookie_jar文件中
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
//设定返回的数据是否自动显示
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//设定是否显示头信息
curl_setopt($ch, CURLOPT_HEADER, false);
//设定是否输出页面内容
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_exec($ch);
curl_close($ch);
//get data after login
$ch2 = curl_init();
curl_setopt($ch2, CURLOPT_URL, $after_login_url);
curl_setopt($ch2, CURLOPT_HEADER, false);
curl_setopt($ch2, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch2, CURLOPT_COOKIEFILE, $cookie_jar);
$orders = curl_exec($ch2);
echo '<pre>';
echo strip_tags($orders);
echo '</pre>';
curl_close($ch2);
?>
方法2用fsockopen:
<?php
function GetWebContent($host, $method, $str, $sessid = '''')
{
$ip = gethostbyname($host);
//echo "ip=$ip<br>";
[email=$fp=@fsockopen($ip,80]$fp=@fsockopen($ip,80[/email]);
if (!$fp) return;
fputs($fp, "$method ");
fputs($fp, "Host: $host ");
if (!empty($sessid))
{
fputs($fp, "Cookie: PHPSESSID=$sessid; path=/; ");
}
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, "Content-Length: ". strlen($str) . " "); // 别忘了指定长度
}
//fputs($fp, "Content-Type: application/x-www-form-urlencoded ");
fputs($fp, "Content-Type: application/x-www-form-urlencoded ");
fputs($fp, "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1) )");//add by Ew 071012
fputs($fp, "Connection: Keep-Alive ");
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, $str." ");
}
while(!feof($fp))
{
$response .= fgets($fp);
}
$hlen = strpos($response," "); // LINUX下是 " "
$header = substr($response, 0, $hlen);
//echo "header=$header<hr><hr>";
$entity = substr($response, $hlen + 4);
if ( preg_match(''/PHPSESSID=([0-9a-z]+);/i'', $header, $matches))
{
$a[''sessid''] = $matches[1];
}
if ( preg_match(''/Location: ([0-9a-z_?=&#.]+)/i'', $header, $matches))
{
$a[''location''] = $matches[1];
}
$a[''content''] = $entity;
fclose($fp);
return $a;
}
$response = GetWebContent("$host","POST /$login_page HTTP/1.0", $str);//登入得到新的session_id
//...可以在这里先保存session_id
$response = GetWebContent("$host","GET /$somepage HTTP/1.0", '''', $response[''sessid'']);//使用session_id访问页面
echo $response[''location''].$response[''content'']."<br>";
?>
<?php
/*
用PHP程序如何模拟用户登录yahoo空间(http://i.cn.yahoo.com/)。所谓的“模拟用户登录”是指用写支PHP程序模拟用户登录
的过程。
*/
//登陆成功则会提示succeed 失败则自动转向yahoo出错页
loginYahoo('cnphpd@yahoo.com','******');
function loginYahoo($user,$pass){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://edit.bjs.yahoo.com/config/login");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_POSTFIELDS,'login='.$user.'&passwd='.$pass.'&.persistent=y');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec($ch) or die (curl_error($ch));
echo $result;
echo curl_error($ch);
curl_close($ch);
//判断是否登陆成功!
$ch2 = curl_init();
curl_setopt($ch2, CURLOPT_URL, "http://i.cn.yahoo.com/my.html?.login=1");
curl_setopt($ch2, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch2, CURLOPT_SSL_VERIFYHOST, 2);
$result2 =curl_exec($ch2);
curl_close($ch2);
if($result2==1){
echo 'succeed';
}else{
echo 'error';
}
}
?>
https://app.cheetahmail.com/cgi-bin/mailers/authen/login.cgi