# coding:utf-8
import requests
import bs4
import time
import xlwt
import random
def get_IP():
"""获取代理IP
"""
url = "http://www.xicidaili.com/nn/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',}
session = requests.session()
html = session.get(url, headers = headers).text
table = bs4.BeautifulSoup(html, 'lxml')
IP_lists = table.find('table', attrs={'id':'ip_list'}).find_all('tr')
ip_list = []
for IP_list in IP_lists[1:]:
lists = IP_list.find_all('td')
ip = {'ip': '', 'port': ''}
if lists[5].text == 'HTTP':
ip['ip'] = lists[1].text
ip['port'] = lists[2].text
爬虫实战9:爬取1688网站商家信息
最新推荐文章于 2025-06-30 15:11:07 发布