主程序
#!/usr/bin/python
# -*- coding:utf-8 -*-
import urllib.request, http.cookiejar, re
import time
import threading, sys
# tools是我的自定义工具类
import tools
import socket
'''
模拟访问博客增加访问量
'''
socket.setdefaulttimeout(1.0)
visitnum = 2
if len(sys.argv) > 1:
visitnum = sys.argv[1]
visitnum = int(visitnum)
class Csdn(threading.Thread):
'csdn增加访问量'
contents = {}
headers = [('host', 'blog.youkuaiyun.com'),
('User-Agent',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
]
domain = 'http://blog.youkuaiyun.com'
url = 'http://blog.youkuaiyun.com/%s/article/list/%s'
def __init__(self, username, page, times):
threading.Thread.__init__(self)
self.username = username
self.createOpener()
self.page = page
self.times = times
@staticmethod
def getListPages(username,page=1,count=1):
'获取总页数'
key = 'page_' + str(page)
if page==1:
Csdn.headers.append(('Referer', 'http://blog.youkuaiyun.com/' + username))
cookie = http.cookiejar.CookieJar()
cookieProc = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(cookieProc)
opener.addheaders = Csdn.headers
url = Csdn.url % (username, page)
try:
response = opener.open(url)
except Exception as e:
if count>page:
Csdn.getListPages(username, page + 1, count)
else:
raise Exception('出错了')
Csdn.contents[key] = contents = response.read().decode('utf-8', 'ignore')
pattern = r'<div id="papelist" class="pagelist">([\s\S]*?)共(\d+)页'
match = re.search(pattern, contents)
pages = int(match.group(2))
if pages>page:
Csdn.getListPages(username,page+1,pages)
return pages
def createOpener(self):
cookie = http.cookiejar.CookieJar()
cookieProc = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(cookieProc)
opener.addheaders = Csdn.headers
self.opener = opener
def visitUrl(self):
'访问列表页获取内容'
key = 'page_' + str(self.page)
if key in Csdn.contents:
print('--------%s exists' % key)
self.contents = Csdn.contents[key]
else:
opener = self.opener
url = Csdn.url % (self.username, self.page)
try:
response = opener.open(url)
self.contents = contents = response.read().decode('utf-8', 'ignore')
except Exception as e:
print("访问 %s 出错 " % url)
return
Csdn.contents[key] = contents
self.addVisitNum()
def addVisitNum(self):
opener = self.opener
contents = self.contents
divPattern = r'<div id="article_list" ([\s\S]*)<div id="papelist" class="pagelist">'
ulMatch = re.search(divPattern, contents)
divText = ulMatch.group(1)
smallPattern = r'<div class="list_item article_item">([\s\S]*?)<span class="link_title"><a href="(.*?)">([\s\S]*?)</a></span>([\s\S]*?)阅读</a>\((\d+)\)'
match = re.findall(smallPattern, divText)
for i in match:
list = {'url': Csdn.domain + i[1], 'name': i[2].strip(), 'num': i[4]}
try:
opener.open(list['url'])
except Exception as e:
print("当前第%s次访问,访问出错,url:%s" % (self.times, list['url']))
continue
print("当前第%s次访问,第%s页,url:%s" % (self.times, self.page, list['url']))
def run(self):
'线程主方法'
self.visitUrl()
class MainCsdn(threading.Thread):
def __init__(self, username, times, pages):
threading.Thread.__init__(self)
self.username = username
self.times = times
self.pages = pages
def run(self):
pages = self.pages
username = self.username
threads = []
for page in range(1, pages + 1):
thread = Csdn(username, page, times=self.times)
thread.start()
threads.append(thread)
# 等待所有线程完成
for t in threads:
t.join()
print(self.times, " 退出第%s次执行任务" % self.times)
@tools.runTime
def main():
'主方法'
# csdn昵称
username = 'csdn昵称'
pages = Csdn.getListPages(username,1,1)
threads = []
# startThread = MainCsdn(username=username, times=0, pages=pages)
# startThread.start()
# startThread.join()
for i in range(1, visitnum + 1):
thread = MainCsdn(username=username, times=i, pages=pages)
thread.start()
threads.append(thread)
# 等待所有线程完成
for t in threads:
t.join()
print("退出主线程")
if __name__ == '__main__':
main()
#!/usr/bin/python
# -*- coding:utf-8 -*-
import time
'''
自定义工具方法,tools.py
'''
def runTime(func):
'记录程序运行时间'
def newFunc(*args, **kwargs):
start = time.clock()
res = func(*args, **kwargs)
end = time.clock()
print("read: %f s" % (end - start))
return res
return newFunc
def log(content, file='test.log', type=1):
if type == 1:
f = open(file, 'a+', encoding='utf-8')
else:
f = open(file, 'w+', encoding='utf-8')
content=str(content)
f.write(content)