import requests import re import pymysql import struct import socket from bs4 import BeautifulSoup db = pymysql.connect('localhost','root','oracle','xici_proxy',use_unicode=True, charset="utf8") cursor = db.cursor() cursor.execute("DROP TABLE IF EXISTS IPLIST") sql = '''create table iplist( id INT NOT NULL AUTO_INCREMENT, ip long, port int, address char(40), anony char(20), protocol char(20), speed char(40), time char(40), PRIMARY KEY ( id ) ); ''' cursor.execute(sql) for page in range(1,3): url='http://www.xicidaili.com/nn/{}'.format(page) headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'} data=requests.get(url=url,headers=headers).text #print (url,requests.get(url=url,headers=headers).status_code) soup=BeautifulSoup(data,'html.parser') trs1 = soup.find('table',id='ip_list') #print (trs1) trs = trs1.find_all('tr') for tr in trs[1:]: tds = tr.find_all('td') if tds[1].find('img') is None : nation = '未知' locate = '未知' else: nation = tds[1].find('img')['alt'].strip() locate = tds[4].text.strip() ip = tds[1].text.strip() ip_num=p=socket.ntohl(struct.unpack("I",socket.inet_aton(ip))[0]) port = tds[2].text.strip() address = tds[3].text.strip() anony = tds[4].text.strip() protocol= tds[5].text.strip() speed = tds[6].find('div')['title'].strip() time = tds[9].text.strip() sql1 ='''insert into iplist (ip,port,address,anony,protocol,speed,time) values({},{},'{}','{}','{}','{}','{}');'''.format(ip_num,port,address,anony,protocol,speed,time) print (sql1) try: cursor.execute(sql1) db.commit() except: db.rollback() print('回滚') print ('ip:',ip_num,'port:',port,'address:',address,'anony:',anony,'protocol:',protocol,'speed:',speed,'time:',time) db.close()