from urllib import request,parse from urllib.error import HTTPError,URLError #a.get(url,headers=None) def get(rul,headers=None): return urlrequests(url,headers=headers) #b.post(url,form,headers=None) def post(url,form,headers=None) : return urlrequests(url,form,headers=headers) #传入URL #user_agent #headers #定义REQUEST #URLOPEN #返回byte数组 def urlrequests(url,form=None,headers=None): user_agent = 'Mozilla/5.0 (Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' if headers == None: headers = { 'User-Agent':user_agent } html_bytes = b'' try: if form: # 2.1 转换成tr类型,然后在转换成bytes 类型 form_str = parse.urlencode(form) # print(form_str) form_bytes = form_str.encode('utf-8') req = request.Request(url, data=form_bytes, headers=headers) else: req = request.Request(url,headers=headers) response = request.urlopen(req) html_bytes = response.read() except HTTPError as e: print(e) except URLError as e: print(e) return html_bytes if __name__ == '__main__': # url = 'http://fanyi.baidu.com/sug' # form = { # 'kw':'呵呵' # } # html_bytes = post(url,form=form) # print(html_bytes.decode('utf-8')) url = 'http://www.baidu.com' html_bytes = get(url) print(html_bytes)
爬虫初级之get,post函数简单封装
最新推荐文章于 2024-03-14 18:40:12 发布