# 用 requests 模块从 Web 下载文件
# Import library files
import requests
# 发送一个http请求 send http requests
res = requests.get('http://www.gutenberg.org/cache/epub/1112/pg1112.txt')
# check error 检查错误
res.raise_for_status()
# Check state code 检验状态码
print(res.status_code == requests.codes.ok)
print(len(res.text))
# print the text of less then 250
print(res.text[:250])
# open a file
playFile = open('RomeoAndJuliet.txt', 'wb')
# iter_content()方法在循环的每次迭代中,返回一段内容。每一段都是 bytes 类型,你需要指定一段包含多少字节。
for chunk in res.iter_content(100000):
# write text
playFile.write(chunk)
playFile.close()
# requests 抓取的是网页的原始数据,如果不是txt文件的话就会将html等数据也抓取下来
import requests
res = requests.get('http://inventwithpython.com/page_that_does_not_exist')
try:
res.raise_for_status()
except Exception as exc:
print('There was a problem: %s' % (exc))