php实现爬取知乎神回复数据——做成小程序上线_微信小程序爬虫知乎-优快云博客

本文链接：https://blog.youkuaiyun.com/sinat_27719433/article/details/84257443

知乎真是一个神奇的地方，经常会看到很多令人忍俊不禁的神回复，初看之下拍案叫绝，细思之下更是回味无穷。这篇文章主要介绍了php实现爬取知乎神回复简单爬虫代码分享（看了网上的python版本改写的，写的又臭又长），选取了几个比较感兴趣的话题进行爬取，其中比较有趣的还是程序员相关的话题。

知乎神回复都有些什么特点呢？我们先来观察一下：

在这里插入图片描述

大家看出什么规律了么？短小精辟有没有？赞同很多有没有？所以爬取知乎神回复我们只要爬取那些赞同多又字数少的回答就可以。直接上代码：

public function getZHData($value='')
    {
      // IT $topic_ids = [19551556,19551052,19551151,19582090,19584190,19551137];
        // 生活方式 $topic_ids=[19550429,19556784,19575422,19592882,19559915,19587000,19579062,19563237,19588633,19569846,19606425];
        $topic_ids=[19554827,19562832,19559052,19552439,19558415,19580121,19594304,19555667,19555495,19604473,19624659];
		for ($i=0; $i <count($topic_ids) ; $i++) { 
			$this->get_answers($topic_ids[$i]);
		}
		return 1;
    } 
    function get_answers($topic_id)
	{
		$page_no = 0;
		while (1) {
			$is_end = $this->get_answers_by_page($topic_id, $page_no);
		    $page_no += 1;
		    if($is_end){
		    	break;
		    }
		}
	}

	function get_answers_by_page($topic_id, $page_no){
		DB::insert("insert into isread (topic_id,page_no) values(?,?)",[$topic_id, $page_no]);	
		$offset = $page_no * 10;
	    $url = "https://www.zhihu.com/api/v4/topics/".$topic_id. "/feeds/essence?include=data%5B%3F(target.type%3Dtopic_sticky_module)%5D.target.data%5B%3F(target.type%3Danswer)%5D.target.content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F(target.type%3Dtopic_sticky_module)%5D.target.data%5B%3F(target.type%3Danswer)%5D.target.is_normal%2Ccomment_count%2Cvoteup_count%2Ccontent%2Crelevant_info%2Cexcerpt.author.badge%5B%3F(type%3Dbest_answerer)%5D.topics%3Bdata%5B%3F(target.type%3Dtopic_sticky_module)%5D.target.data%5B%3F(target.type%3Darticle)%5D.target.content%2Cvoteup_count%2Ccomment_count%2Cvoting%2Cauthor.badge%5B%3F(type%3Dbest_answerer)%5D.topics%3Bdata%5B%3F(target.type%3Dtopic_sticky_module)%5D.target.data%5B%3F(target.type%3Dpeople)%5D.target.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics%3Bdata%5B%3F(target.type%3Danswer)%5D.target.annotation_detail%2Ccontent%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F(target.type%3Danswer)%5D.target.author.badge%5B%3F(type%3Dbest_answerer)%5D.topics%3Bdata%5B%3F(target.type%3Darticle)%5D.target.annotation_detail%2Ccontent%2Cauthor.badge%5B%3F(type%3Dbest_answerer)%5D.topics%3Bdata%5B%3F(target.type%3Dquestion)%5D.target.annotation_detail%2Ccomment_count&limit=10&offset=".$page_no;
	    // echo $url;die;
	    $curl = curl_init();
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl, CURLOPT_TIMEOUT, 500);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
        curl_setopt($curl, CURLOPT_URL, $url);
        $res = curl_exec($curl);
        curl_close($curl);
        $res = json_decode($res, true);
	    if($res){
	    	 $is_end = $res["paging"]["is_end"];
	        if(array_key_exists("data",$res)){
	        	$data =$res["data"];
	        }else{
	        	return $is_end;
	        }
	    }else{
	    	return false;
	    }       
	    for ($i=0; $i < count($data); $i++) {	    	
	    	$question_title;
	    	$target = $data[$i]["target"];
	    	if(array_key_exists("question",$target)){
	    		$question_title =$target["question"]["title"];
	    	}else{
	    		echo "string\n";
	    		continue;
	    	}	    		    	
	    	$anwser_con = $data[$i]["target"]["content"];
	    	$voteup_count = $data[$i]["target"]["voteup_count"];
	    	$id = $data[$i]["target"]["id"]; 
	    	$voteup_count = strval($voteup_count);	    	
	    	if(strlen($anwser_con)<200 &&$voteup_count>1000 ){
	    	   $sql1 = "select * from question_a where anwser_id='$id'";	    		
	    	   $is_save = DB::select($sql1);	    	   
	    	   if(count($is_save)<1){
	    	   	   $sql = "INSERT INTO question_a (question_title,anwser_con,voteup_count,topic_id,anwser_id)
	    	   		 values (?,?,?,?,?)";
				   DB::insert($sql,[$question_title,$anwser_con,$voteup_count,4,$id]);
	    	   }else{
	    	   	echo "已经有了";
	    	   }				
	    	}
	    }
	    return $is_end;
	}