xiaodu_jd.py-20180830

# -*- coding: utf-8 -*-
"""
Created on Tue Aug 28 11:02:24 2018

@author: wenyun.wxw
"""
import requests
import re

def xiaodu(score):    #score=0为全部,1为差评,2为中评,3为好评, 4配图评论
    urls = []
    #替换页面数为i,取前718页评论
    for i in list(range(0,717)):
        urls.append('https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv4566&productId=6333836&score='+str(score)+'&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1')
    
    # 构建字段容器
    #idnum=[]
    #scorelist=[] 
    ratecontent_jd = []
    
    # 循环抓取数据
    for url in urls:
        content = requests.get(url).text
    # 借助正则表达式使用findall进行匹配查询
        #idnum.extend(re.findall('"id":(.*?),"guid"',content))
        ratecontent_jd.extend(re.findall(re.compile('"content":"(.*?)","creationTime"'),content))
        #scorelist.extend(re.findall(re.compile('"score":(\d),"status"'),content))
    
    #去重
    ratecontent_jd_less=list(set(ratecontent_jd))    
                
    return ratecontent_jd_less

ratecontent_jd_less=xiaodu(0)
ratecontent_jd_less_1=xiaodu(1)
ratecontent_jd_less_2=xiaodu(2)
ratecontent_jd_less_3=xiaodu(3)

#把客服的回复去掉
def kefu(ratecontent_jd_less):
    result=[]
    for l in ratecontent_jd_less:
        if '您' not in l:
            result.append(l)
    return result

result1=kefu(ratecontent_jd_less_1)#差评
result2=kefu(ratecontent_jd_less_2)#中评
result3=kefu(ratecontent_jd_less_3)#好评


## 写入数据
#file = open('小度在家评价-京东.csv','w',encoding='gbk',errors='ignore')
#for i in list(range(len(ratecontent_jd))):
#    file.write(ratecontent_jd[i]+'\n')
#file.close()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值