一:在Pycharm中输入爬虫代码
代码如下
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import random
from time import sleep
import pymysql
# 请求头
h1 = {
'Cookie': 'll="108309"; bid=Me0AOgr-hLE; __utma=30149280.1794767935.1702823028.1702823028.1702823028.1; __utmc=30149280; __utmz=30149280.1702823028.1.1.utmcsr=cn.bing.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmb=30149280.1.10.1702823028; __utma=223695111.668481595.1702823036.1702823036.1702823036.1; __utmb=223695111.0.10.1702823036; __utmc=223695111; __utmz=223695111.1702823036.1.1.utmcsr=cn.bing.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1702823036%2C%22https%3A%2F%2Fcn.bing.com%2F%22%5D; _pk_id.100001.4cf6=88192bd6fadc2c98.1702823036.; _pk_ses.100001.4cf6=1; ap_v=0,6.0; _vwo_uuid_v2=D95348D3B6ABB211449A744F64EFB7A2C|8187f85b5505a6cd252bb47eecb59ad6; __yadk_uid=7vjVvsySJaC49VGXlGZ0qFGiPP7jme1g',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Host': 'movie.douban.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15',
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
'Referer': 'https://movie.douban.com/subject/35267224/?from=showing',
'Connection': 'keep-alive'
}
def trans_star(v_str):
"""转换评论星级"""
v_str = v_str[0]
if v_str == 'allstar10':
return '