遇到的问题:如果不模拟登陆的话只能爬取200条评论,但是实现模拟登陆之后也只能爬取500条数据
# -*- encoding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import re
import random
import time
#使用session来保存登陆信息
s = requests.session()
#获取动态ip,防止ip被封
def get_ip_list(url, headers):
web_data = requests.get(url, headers=headers)
soup = BeautifulSoup(web_data.text, 'lxml')
ips = soup.find_all('tr')
ip_list = []
for i in range(1, len(ips)):
ip_info = ips[i]
tds = ip_info.find_all('td')
ip_list.append(tds[1].text + ':' + tds[2].text)
return ip_list
#随机从动态ip链表中选择一条ip
def