import requests
r = requests.get('https://www.baidu.com/')
r.encoding = 'utf-8'
print(r.text)
第二节:Requsts+Xpath 爬取豆瓣电影
1、爬取单个元素信息
import requests
from lxml import etree
url = 'https://movie.douban.com/subject/1292052/'# 《肖申克的救赎》链接
r = requests.get(url).text
s = etree.HTML(r)
file = s.xpath('//*[@id="content"]/h1/span[1]/text()') # 获取Xpath
print(file)
2、爬取多个元素信息
import requests
from lxml import etree
url = 'https://movie.douban.com/subject/1292052/'
r = requests.get(url).text
s = etree.HTML(r)
movie = s.xpath('//*[@id="content"]/h1/span[1]/text()')
director = s.xpath(