抓取京东,淘宝等商品信息。
以下是本人在项目中碰到的,拿出来和大家分享一下,如果转载请标明转载来源,谢谢!
以下的代码本人已经注释的很明确了,如果还有不懂的地方请加我QQ:71124324,下面把代码呈上:
/**
* 抓取拍拍商品信息
*/
function paipaiItem($url){
//$html = self::getHTML("http://item.wanggou.com/2865F632000000000401000031513F7F",3600); //免运费
//$html = self::getHTML("http://item.wanggou.com/C250F6320000000000423AE807A347E0",3600); //要运费
$html = self::getHTML($url ,3600);
$html = iconv("gbk", "UTF-8//IGNORE", $html); //编码转换
//拍拍商品页面数据,后面可能用上 var pageMess= //页面数据岛
preg_match('/<input type="hidden" name="sTitle" value="(.*?)" \/>/i', $html, $name);
preg_match('/<input type="hidden" id="itemid" name="itemid" value="(.*?)"/i', $html, $iid);
preg_match('/<img class="pic_master".*?src="(.*?)"/i', $html, $pic);
preg_match('/<div id="pfhlkd_picshower">.*?<img src="(.*?)"/is', $html, $pic2);
preg_match('/<em id="commodityCurrentPrice" defaultVal="([0-9.]+?)">/i', $html, $price); //促销价
preg_match('/<input type="hidden" name="Price" value="([0-9.]+?)" \/>/i', $html, $price2); //原价
preg_match('/<input type="hidden" name="SellerCredit" value="(.*?)" \/>/i', $html, $SellerCredit);
preg_match('/<em id="pfhlkd_shipCost".*?info="\|(.*?)\|/i', $html, $freight);
preg_match('/<a id="shop_name_anchor" href="(.*?)" title="(.*?)">/i', $html, $shopname);
preg_match('/<em id="currentStockNum">(.*?)<\/em>/i', $html, $StockNum);
preg_match('/<input type="hidden" name="stockString" id="stockString" value="(.*?)"/i', $html, $stockString);
$info['iid'] = isset($iid[1])?trim($iid[1]):''; //商品ID
$info['status'] = $info['iid']==''?0:1;
$info['approve_status'] = $info['iid']==''?'stockout':'onsale';
$info['detail_url'] = $info['iid']!=''?"http://item.wanggou.com/".$info['iid']:$url; //商品URL
$info['title'] = isset($name[1])?trim($name[1]):''; //商品名字
$info['imagePhoto'] = isset($pic[1])?trim($pic[1]):(isset($pic2[1])?trim($pic2[1]):''); //商品图片
$info['price'] = isset($price[1])?trim($price[1]):(isset($price2[1])?trim($price2[1]):''); //商品价格
$info['freight'] = isset($freight[1])?trim($freight[1])?trim($freight[1]):0:0; //商品运费
$info['shopurl'] = isset($shopname[1])?trim($shopname[1]):''; //店铺URL
$info['shopname'] = isset($shopname[2])?trim($shopname[2]):''; //店铺名称
$info['SellerCredit'] = isset($SellerCredit[1])?trim($SellerCredit[1]):''; //卖家信用
$info['num'] = isset($StockNum[1])?trim($StockNum[1]):''; //总库存
$info['stockString'] = isset($stockString[1])?trim($stockString[1]):''; //库存字符串
$info['shopType'] = "2"; //购物网站类型,1.淘宝,2.其他
$info['shopWeb'] = "wanggou.com"; //购物网站
/* 暂时不用,后面可能会用上
$info['skus'] = explode("0#",$info['stockString']); //分割库存字符串
$temp = array();
foreach($info['skus'] as $k=>$v){
if($v!=""){
$temp1 = explode('|',$v); //颜色跟尺码分开
$color = explode(':',$temp1[0]); //处理颜色
$size = explode(':',$temp1[1]); //处理尺码
$itemInfo = explode('~',$size[1]); //尺码里面包含价格数量等信息,再次进行处理
$size[1] = $itemInfo[0]; //更新正确尺码
list(,$price,$quantity,,) = explode(',',$itemInfo[1]); //取得价格跟数量
//整理数据
$temp[md516($color[1])]['color'] = $color[1];
$temp[md516($color[1])][md516($size[1])] = array(
'size'=>$size[1],
'price'=>$price,
'_price'=>_usaprice($price),
'quantity'=>$quantity
);
}
}
$info['skus'] = $temp;*/
return $info;
}
这个是最典型的的,想要抓取别的网站的商品信息就按照这个稍微修改一下便是,支持原创,转载请标明转载来源!!!!