工具:python3
本文主要介绍用urllib去抓取网页。
去看看百度首页
from urllib import request
import chardet
if __name__ == "__main__":
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}
url="https://www.baidu.com/"
req=request.Request(url,headers=headers)
response = request.urlopen(req)
html = response.read()
charset = chardet.detect(html)
print("chardet去查看默认编码信息:%s" % charset)
print("响应码:%s" % response.getcode())
print(html)
结果