from urllib import request import urllib import re from http import cookiejar try: #1.定义url url='http://tieba.baidu.com/f?kw=%B6%CE%D7%D3&fr=ala0&tpl=5&dyTabStr=MCw2LDIsNCw1LDMsMSw4LDcsOQ%3D%3D' #2定义请求头 header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'} #2.1定义请求数据data #定义参数------字典类型 values={'username':'123456','password':'12345678'} #参数编码 data=urllib.parse.urlencode(values).encode(encoding='UTF8') #3.构建请求 req=request.Request(url,headers=header,data=data) #4.构建cookie, #4.1定义cookie cookie=cookiejar.CookieJar() #4.2定义一个cookie处理器,把cookie传进去 handler=request.HTTPCookieProcessor(cookie) #4.3定义下载器 opener=request.build_opener(handler) #5.使用下载器下载请求 resp=opener.open(req) #6.使用re模块解析自己想要的数据 # 打开网页 #resp = request.urlopen(req) # 打印响应,解码 content = resp.read().decode('utf-8') print(content) #定义正则 pattern=re.compile(r'<a rel="noopener".*?title=(.*?)\s.*?>(.*?)</a>') # 匹配html items = re.findall(pattern, content) # 打印解码的内容 for i in items: print(i[0] + '\t' + i[1]) except request.URLError as e: if hasattr(e, 'code'): print(e.code) if hasattr(e, 'reason'): print(e.reason)