本文以抓取环球网新闻为例,如要抓取其他网站新闻课可自行更改,方法相同。
<?php
$link=mysql_connect("localhost","root","");
mysql_select_db("blog");
$str = file_get_contents("http://world.huanqiu.com/regions/");
$content = explode('<ul class="listPicBox">',$str);
$content = explode('</ul>',$content[1]);
$pat = '/<li class="item">\s*<a href="(.*?)"(.*?)><img src="(.*?)"(.*?)(.*?)<\/a>\s*<h3><a href="(.*?)"(.*?)>(.*?)<\/a><\/h3>\s*<h5>(.*?)\s*<em>(.*?)<\/em>(.*)<\/h5>\s*<h6><span><\/span>(.*?)<\/h6>\s*<\/li>/i';
preg_match_all($pat, $content[0], $m);
$picurl = $m[3];
$sourceurl = $m[1];
$title = $m[8];
$subtitle = $m[9];
$i=0;
$count = count($m[1]);
while ($i<$count)
{
$main=array();
$con = file_get_contents($sourceurl[$i]