# -*- coding: utf-8 -*-
"""
Created on Tue Aug 28 11:02:24 2018
@author: wenyun.wxw
"""
import requests
import re
def xiaodu(score): #score=0为全部,1为差评,2为中评,3为好评, 4配图评论
urls = []
#替换页面数为i,取前718页评论
for i in list(range(0,717)):
urls.append('https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv4566&productId=6333836&score='+str(score)+'&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1')
# 构建字段容器
#idnum=[]
#scorelist=[]
ratecontent_jd = []
# 循环抓取数据
for url in urls:
content = requests.get(url).text
# 借助正则表达式使用findall进行匹配查询
#idnum.extend(re.findall('"id":(.*?),"guid"',content))
ratecontent_jd.extend(re.findall(re.compile('"content":"(.*?)","creationTime"'),content))
#scorelist.extend(re.findall(re.compile('"score":(\d),"status"'),content))
#去重
ratecontent_jd_less=list(set(ratecontent_jd))
return ratecontent_jd_less
ratecontent_jd_less=xiaodu(0)
ratecontent_jd_less_1=xiaodu(1)
ratecontent_jd_less_2=xiaodu(2)
ratecontent_jd_less_3=xiaodu(3)
#把客服的回复去掉
def kefu(ratecontent_jd_less):
result=[]
for l in ratecontent_jd_less:
if '您' not in l:
result.append(l)
return result
result1=kefu(ratecontent_jd_less_1)#差评
result2=kefu(ratecontent_jd_less_2)#中评
result3=kefu(ratecontent_jd_less_3)#好评
## 写入数据
#file = open('小度在家评价-京东.csv','w',encoding='gbk',errors='ignore')
#for i in list(range(len(ratecontent_jd))):
# file.write(ratecontent_jd[i]+'\n')
#file.close()