import requests
from random import choice
from time import strftime
ua =['Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;','Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)']defwrite_log(*args):withopen('%s.txt'% strftime('%Y%m%d'),'a', encoding='utf-8')as f:for a in args:
f.write('%r\n'% a)
f.write('\n')defget(url, encode='utf-8', times=10):if times <0:return''try:
r = requests.get(url, headers={'User-Agent': choice(ua)}, timeout=30)except Exception as e:
write_log(url, e, times)return get(url, encode, times -1)if r.status_code ==200:
r.encoding = encode
return r.text
else:
write_log(url, r.status_code, times)return get(url, encode, times -1)defpost(url, data, encode='utf-8', times=10):if times <0:return''try:
r = requests.post(url, headers={'User-Agent': choice(ua)}, data=data, timeout=30)except Exception as error:
write_log(url, data, error, times)return post(url, data, encode, times -1)if r.status_code ==200:
r.encoding = encode
return r.text
else:
write_log(url, data, r.status_code, times)return post(url, data, encode, times -1)defget_img(url, times=3):"""下载图片"""if times <0:return b''try:
r = requests.get(url, headers={'User-Agent': choice(ua)}, timeout=30)except Exception as e:
write_log(url, e, times)return get_img(url, times -1)if r.status_code ==200:return r.content
else:
write_log(url, r.status_code, times)return get_img(url, times -1)
class版
request . py
from time import time, sleep, strftime
from requests import get, post
from random import choice
from chardet import detect # 自动检测网页编码from core.conf import REQUEST as R, Color as C
defwrite_log(*args,**kwargs):
C.red(args, kwargs)withopen('%s.txt'% strftime('%Y%m%d'),'a', encoding='utf-8')as f:for i in args:
f.write('%r\n'% i)for k, v in kwargs.items():
f.write('%r\t%r\n'%(k, v))
f.write('\n')defget_proxies():return{}classResponse:
text =''
@classmethoddefjson(cls):returndict()classRequest:def__init__(self, header=None):
self.t = time()
self.headers = header ordict()def__del__(self):
t =(time()- self.t)/60
C.yellow('%.2f分钟'% t)defget(self, url, times=R.times,**kwargs):
sleep(choice(list(range(R.sleep))))# 随机睡眠if times <0:return Response()# 空响应
self.headers['User-Agent']= choice(R.ua)try:
r = get(
url=url,
headers=self.headers,
timeout=R.timeout,
proxies=get_proxies(),**kwargs
)except Exception as error:
write_log(url, error=error,**kwargs)# 写日志return self.get(url, times -1,**kwargs)if r.status_code ==200:
r.encoding = detect(r.content)['encoding']
self.headers['Referer']= url
return r
else:
write_log(url, status_code=r.status_code,**kwargs)# 写日志return self.get(url, times -1,**kwargs)defpost(self, url, data, times=R.times,**kwargs):
sleep(choice(list(range(R.sleep))))# 随机睡眠if times <0:return Response()# 空响应
self.headers['User-Agent']= choice(R.ua)try:
r = post(
url=url,
data=data,
headers=self.headers,
timeout=R.timeout,
proxies=get_proxies(),**kwargs
)except Exception as error:
write_log(url, data=data, error=error,**kwargs)# 写日志return self.post(url, data, times -1,**kwargs)if r.status_code ==200:
r.encoding = detect(r.content)['encoding']return r
else:
write_log(url, data=data, status_code=r.status_code,**kwargs)# 写日志return self.post(url, data, times -1,**kwargs)if __name__ =='__main__':
r = Request()for i inrange(2,4):
response = r.get('https://blog.youkuaiyun.com/Yellow_python/article/list/%d?'% i)
C.green(response.url)
C.cyan(response.encoding)
C.blue(response.cookies)
C.purple(response.headers)
C.darkcyan(response.request)
C.yellow(response.elapsed)# 请求时长
C.underline(r.headers)print()
conf . py
classREQUEST:
ua =['Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',# IE9.0'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',# IE8.0'Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.0)',# IE7.0'Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)',# IE6.0'Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1',# Firefox4.0.1–MAC'Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1',# Firefox4.0.1–Windows'Opera/9.80(Macintosh;IntelMacOSX10.6.8;U;en)Presto/2.8.131Version/11.11',# Opera11.11–MAC'Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11',# Opera11.11–Windows'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11','Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)',# 傲游(Maxthon)'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)',# 腾讯TT'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)',# 360浏览器'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TheWorld)',# 世界之窗(TheWorld)3.x'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko','Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',]
sleep =1# SLEEP >= 1
timeout =10
times =10classMySQL:
host ='localhost'
user ='root'
password ='yellow'
charset ='utf8'
db ='z'*3classColor:
@classmethoddef_wrap_colour(cls, colour,*args):for a in args:print(colour +'{}'.format(a)+'\033[0m')
@classmethoddefblue(cls,*args):return cls._wrap_colour('\033[94m',*args)
@classmethoddefbold(cls,*args):return cls._wrap_colour('\033[1m',*args)
@classmethoddefcyan(cls,*args):return cls._wrap_colour('\033[96m',*args)
@classmethoddefdarkcyan(cls,*args):return cls._wrap_colour('\033[36m',*args)
@classmethoddefgreen(cls,*args):return cls._wrap_colour('\033[92m',*args)
@classmethoddefpurple(cls,*args):return cls._wrap_colour('\033[95m',*args)
@classmethoddefred(cls,*args):return cls._wrap_colour('\033[91m',*args)
@classmethoddefunderline(cls,*args):return cls._wrap_colour('\033[4m',*args)
@classmethoddefyellow(cls,*args):return cls._wrap_colour('\033[93m',*args)
mysql . py
import pymysql, re
from time import strftime
from core.conf import MySQL as M, Color as C
classMysql:def__init__(self, table):
self.db = pymysql.connect(M.host, M.user, M.password, charset=M.charset, db=M.db)
self.cursor = self.db.cursor()
self.table = table
def__del__(self):
self.cursor.close()
self.db.close()deffetchall(self, query):
self.cursor.execute(query)return self.cursor.fetchall()defcommit(self, sql):try:
self.cursor.execute(sql)
self.db.commit()
C.yellow(sql)except Exception as error:
C.red(error, sql)definsert(self, dt):
dt['collect_date']= strftime('%Y-%m-%d')
clear =lambda x: re.sub('''[,'"();]+''',' ',repr(x)).strip()
ls =[(k, clear(v))for k, v in dt.items()if v isnotNone]
sql ='INSERT %s ('% self.table +','.join([i[0]for i in ls])+ \
') VALUES ('+','.join(['%r'% i[1]for i in ls])+');'
self.commit(sql)defupdate(self, dt_update, dt_condition):
sql ='UPDATE %s SET '% self.table +','.join('%s=%r'%(k, v)for k, v in dt_update.items())\
+' WHERE '+' AND '.join('%s=%r'%(k, v)for k, v in dt_condition.items())+';'
self.commit(sql)