1. 首先从纯真ip下载最新ip数据,地址:http://www.cz88.net/,数据格式是这样的
0.0.0.0 0.255.255.255 IANA保留地址 CZ88.NET
1.0.0.0 1.0.0.255 澳大利亚 CZ88.NET
1.0.1.0 1.0.3.255 福建省 电信
1.0.4.0 1.0.7.255 澳大利亚 CZ88.NET
2. 根据ip爬取该ip所处地点
import urllib.parse
import urllib.request
from html.parser import HTMLParser
import time
''' global constants
'''
START = ''
END = ''
LOCATION = ''
NET = ''
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.region = ''
def handle_data(self, data):
if data[0:5] == '本站主数据':
#print(data[6:])
self.region = data[6:]
''' remove the invalid space
'''
def remove_invalid_space(line):
return line.split()
''' format one line
'''
def format_one_line(line):
if len(line) == 6:
net = line[-3] + line[-2] + line[-1]
elif len(line) == 5:
net = line[-2] + line[-1]
else:
net = line[-1]
line_format = [line[0], line[1] , line[2], net]
return line_format
''' get location from ip
'''
def get_location_from_ip(line):
url = 'http://www.ip138.com/ips1388.asp'
data = {'ip': line[0],
'action': '2'}
params = urllib.parse.urlencode(data)
full_url = url + '?' + params
response = urllib.request.urlopen(full_url)
html = response.read().decode('GBK')
parser = MyHTMLParser()
parser.feed(html)
parser.close()
region = remove_invalid_space(parser.region)
if len(region) == 1:
location = region[0]
net = ''
else:
location = region[0]
net = region[-1]
line_format = [line[0], line[1], location, net]
global LOCATION
global NET
global START
global END
if LOCATION == location and NET == net:
line_format_over_write = START + ' ' + str(line_format[1]) + ' ' + location + ' ' + net
over_write_tmp_file(line_format_over_write)
else:
write_to_tmp_file(str(line_format[0]) + ' ' + str(line_format[1]) + ' ' + str(line_format[2]) + ' ' + str(line_format[3]))
START = line[0]
END = line[1]
LOCATION = location
NET = net
''' write to tmp file
'''
def write_to_tmp_file(line):
try:
file = open('ip_tmp.txt', 'a')
file.write(line + '\n')
except FileNotFoundError:
print('file not found')
finally:
if 'file' in locals():
file.close()
''' over write tmp file
'''
def over_write_tmp_file(line):
try:
file = open('ip_tmp.txt')
lines = file.readlines()
curr = lines[:-1]
except FileNotFoundError:
print('file not found')
finally:
if 'file' in locals():
file.close()
try:
file = open('ip_tmp.txt', 'w')
curr.append(line + '\n')
file.writelines(curr)
except FileNotFoundError:
print('file not found')
finally:
if 'file' in locals():
file.close()
def format_ip_file(path):
try:
file = open(path)
for line in file:
# main logic of get location from ip
get_location_from_ip(format_one_line(remove_invalid_space(line)))
time.sleep(0.1)
except FileNotFoundError:
print('file not found')
finally:
if 'file' in locals():
file.close()
print('start')
format_ip_file('D:\workspace\Python\ip\ip.txt')
print('end', end = '')
3. 最好设置延时,要不然搞崩溃ip138