这是我的第一篇blog,希望对大家有用
。
为了方便更新Maxthon代理服务器,写了个python脚本。
处理proxy4free得到的代理服务器文件,验证代理服务器,
按照响应速度排序,生成maxthon配置格式输出。
下面为文件:proxy2maxthon.py
'''
从http://www.proxy4free.com/page1.html
得到代理信息,输出为maxthon(MyIE2格式的代理信息)
用法:把代理信息复制粘贴保存为一个文本文件proxylist.txt,每行为一个代理信息
格式如下:
213.199.192.37 3128 transparent Poland 15.10.2004 Whois
156.110.47.251 8080 anonymous United States 16.10.2004 Whois
68.191.111.217 80 anonymous United States 16.10.2004 Whois
24.14.246.55 80 anonymous United States 16.10.2004 Whois
执行本程序后,把输出屏幕提示信息粘贴到maxthon的
配置文件(安装maxthon目录的Config目录中的setupcenter.ini)中的proxy节中,
注意:修改配置文件时要保证maxthon没有运行。
作者:invalid
电邮:invalid@21cn.com
日期:2004-10-16日
'''
import os
import urllib
import time
import threading
import Queue
from string import strip
version_info = (1,2,"alpha",'20041016')
MAX_THREADS = 50
MAX_PROXYS = 30 # 只要速度最快的前MAX_PROXYS个代理
HTTP_TIMEOUT = 10.0 # 等待返回网页的最大时间,秒
TESTURL = 'http://www.google.com/' # 用来测试代理的网站
TESTRESP = 'Google' # 标识成功返回的网页中关键内容
class Sorter:
'''从Python食谱得到的排序类'''
def _helper(self, data, aux, inplace):
aux.sort( )
result = [data[i] for junk, i in aux]
if inplace:
data[:] = result
return result
def byItem(self, data, itemindex=None, inplace=1):
if itemindex is None:
if inplace:
data.sort( )
result = data
else:
result = data[:]
result.sort( )
return result
else:
aux = [(data[i][itemindex], i) for i in range(len(data))]
return self._helper(data, aux, inplace)
# a couple of handy synonyms
sort = byItem
__call__ = byItem
def TespProxyDirect(proxy):
''' 测试代理服务器的函数,返回值为一个元组(成功标志,所用时间)
缺点是无法控制超时,测试会很慢
参数proxy的格式为:ip:port
'''
iret = 0
usedtime = 0;
proxies = proxies={'http': 'http://'+proxy}
try:
tstart = time.time()
filehandle = urllib.urlopen(TESTURL, proxies=proxies)
data = filehandle.readlines()
filehandle.close()
tend = time.time()
usedtime = tend - tstart
for line in data:
if line.find(TESTRESP) > 0:
iret = 1
break
except:
pass
return (iret,usedtime);
def TestProxy(workQueue, resultQueue):
''' 工作线程,测试代理是否能工作,以及代理响应速度
从workQueue取得代理参数,测试成功保存到 resultQueue.'''
def SubthreadProc(url, result):
''' 子工作过程,测试代理能否工作,并且返回响应速度'''
usedtime = 0;
proxies = proxies = {'http': 'http://'+proxy}
try:
tstart = time.time()
filehandle = urllib.urlopen(TESTURL, proxies=proxies)
data = filehandle.readlines()
filehandle.close()
tend = time.time()
usedtime = (tend - tstart)*1000
for line in data:
if line.find(TESTRESP) > 0:
result.append(usedtime)
except:
result.append(-1)
return
while 1:
# Contine pulling data from the work queue until it's empty
try:
proxy,local = workQueue.get(0)
except Queue.Empty:
# work queue is empty--exit the thread proc.
return
# Create a single subthread to do the actual work
result = []
subThread = threading.Thread(target=SubthreadProc, args=(proxy, result))
# Daemonize the subthread so that even if a few are hanging
# around when the process is done, the process will exit.
subThread.setDaemon(True)
# Run the subthread and wait for it to finish, or time out
subThread.start()
subThread.join(HTTP_TIMEOUT)
if [] == result:
# Subthread hasn't give a result yet. Consider it timed out.
# print proxy,"TIMEOUT"
pass
elif -1 == result[0]:
# Subthread returned an error from geturl.
# print proxy,"FAILED"
pass
else:
# Subthread returned a time. Store it.
# print proxy,result[0]
resultQueue.put((proxy,local, result[0]))
def GenMaxthonCfg(proxys):
'''生成Maxthon配置文件所需格式'''
#index表示开始配置序号
index = 4
#timeout表示代理获取google的最大时间,超过这个时间的代理丢弃不要,单位:毫秒
timeout = 10000
#按照获取时间排序,时间短的排在前面
sort = Sorter()
sort(proxys,2)
print "*****************************************************"
for item in proxys:
print item[0],item[1],int(item[2])
print "*****************************************************"
print "把下面输入粘贴到maxthon的setupcenter.ini配置文件的proxy节"
count = 0
for item in proxys:
#maxthon的代理配置有如下选项:
#p1=http=210.230.192.39:3128
#ps1=3
#pn1=japan
if int(item[2]) > timeout:
continue
p = "p"+str(index)+"=http="+item[0]
ps = "ps"+str(index)+"=3"
pn = "pn"+str(index)+"="+item[1]+str(index)
print p
print ps
print pn
index += 1
count +=1
if count > MAX_PROXYS:
break
return
def main():
#打开代理文件,开始处理
try:
file = open("proxylist.txt","r")
except:
print "open proxylist file error"
sys.exit(1)
data = file.readlines()
file.close()
print "proxylist file have proxy:",len(data)
urls = []
# Record the start time, so we can print a nice message at the end
processStartTime = time.time()
numThreads = min(MAX_THREADS, len(data))
#通过验证的代理记录,单元格式为代理地址端口,名称,获取google时间
proxys = []
workQueue = Queue.Queue()
for line in data:
if line.find("China") > 0:#不用中国的代理服务器
continue
sects = line.split(" ")
if len(sects) > 6:
ip = strip(sects[0])
port = strip(sects[1])
local = sects[-4]
if urls.count(ip) > 0:#代理服务器重复了
continue
urls.append(ip)
workQueue.put((ip+":"+port,local))
#工作线程
workers = []
resultQueue = Queue.Queue()
# Create worker threads to load-balance the retrieval
print "create threads..."
for threadNum in range(0, numThreads):
workers.append(threading.Thread(target=TestProxy,
args=(workQueue,resultQueue)))
workers[-1].start()
# Wait for all the workers to finish
print "waiting threads finish..."
for w in workers:
w.join()
print "threads done."
while not resultQueue.empty():
proxy,local,result = resultQueue.get(0)
proxys = proxys + [(proxy,local,int(result))]
GenMaxthonCfg(proxys)
if __name__ == '__main__':
main()