import re
import requests
from bs4 import BeautifulSoup
# 第一步得到代理
def proxy():
with open(r'ip_proxies\有效ip.txt', 'r', encoding='utf-8') as f:
r = f.readlines()
for ip in r:
try:
proxies = eval(ip)
if requests.get('http://t66y.com/index.php', proxies=proxies, timeout=2).status_code == 200:
return proxies
except:
pass
proxies = proxy()
print(proxies)
# 第二步得到网页链接池
url = 'http://t66y.com/index.php'
url2 = 'http://t66y.com/thread0806.php?fid=16'
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,\
image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
'Cache-Control': 'max-age=0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683
爬虫系列,(3),达盖尔图片抓取
最新推荐文章于 2025-07-03 23:21:15 发布