import urllib.request
import re
import os
import time
import sched
# 写入错误日志方法
def write_error_log(error_str):
try:
fh=open(data_path + "/error_log.txt", "a")
fh.write(time.strftime("-" * 30 + "%Y-%m-%d %H:%M:%S",time.localtime(time.time())) + "-" * 30 + "\n")
fh.write(str(error_str) + "\n")
fh.close()
except Exception as e:
print(e)
url="http://blog.youkuaiyun.com/"
data_path="d:/优快云"
data=urllib.request.urlopen(url,timeout=60).read().decode("UTF-8","ignore")
pat = "<h3 .*?><a href=\"(.*?)\""
alllink=re.compile(pat,re.S).findall(data)
def get_link(wait_time):
if not os.path.exists(data_path):
os.makedirs(data_path)
for link in alllink:
print(link)
try:
urllib.request.urlretrieve(link, data_path + "/" + str(int(round(time.time() * 1000))) + ".html")
except urllib.error.URLError as e:
if hasattr(e, "code"):
write_error_log(e.code)
if hasattr(e, "reason"):
write_error_log(e.reason)
schedule.enter(wait_time, 0, get_link, (wait_time,))
schedule = sched.scheduler( time.time, time.sleep )
#程序入口
if __name__ == "__main__":
wait_time = 5
schedule.enter(0, 0, get_link, (wait_time,))
schedule.run()
import re
import os
import time
import sched
# 写入错误日志方法
def write_error_log(error_str):
try:
fh=open(data_path + "/error_log.txt", "a")
fh.write(time.strftime("-" * 30 + "%Y-%m-%d %H:%M:%S",time.localtime(time.time())) + "-" * 30 + "\n")
fh.write(str(error_str) + "\n")
fh.close()
except Exception as e:
print(e)
url="http://blog.youkuaiyun.com/"
data_path="d:/优快云"
data=urllib.request.urlopen(url,timeout=60).read().decode("UTF-8","ignore")
pat = "<h3 .*?><a href=\"(.*?)\""
alllink=re.compile(pat,re.S).findall(data)
def get_link(wait_time):
if not os.path.exists(data_path):
os.makedirs(data_path)
for link in alllink:
print(link)
try:
urllib.request.urlretrieve(link, data_path + "/" + str(int(round(time.time() * 1000))) + ".html")
except urllib.error.URLError as e:
if hasattr(e, "code"):
write_error_log(e.code)
if hasattr(e, "reason"):
write_error_log(e.reason)
schedule.enter(wait_time, 0, get_link, (wait_time,))
schedule = sched.scheduler( time.time, time.sleep )
#程序入口
if __name__ == "__main__":
wait_time = 5
schedule.enter(0, 0, get_link, (wait_time,))
schedule.run()