本篇博客提供了4种方式,有简略版仅能完成要求却简陋,也有较为完整的方式
1.
# -*- coding:utf-8 -*-
import re
import urllib2
import json
import sys
if sys.getdefaultencoding() != 'utf-8':
reload(sys)
sys.setdefaultencoding('utf-8')
class JDSpider:
def loadPage(self):
url = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv566&productId=100001906474&score=0&sortType=5&page=1&pageSize=10&isShadowSku=0&rid=0&fold=1"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
headers = {'User-Agent': user_agent}
req = urllib2.Request(url,headers=headers)
response = urllib2.urlopen(req)
html = response.read()
pattern = re.compile('"content":"(.*?)".*?"nickname":"(.*?)"')
# print html.decode("gbk")
# pattern = re.compile(r',"creationTime"(.*?)"nickname":')
# html = p