python爬取豆瓣读书top250,并保存在本地。
分别用requests+re、requests+xpath 完成
1.requests + re
import requests
import re
def getHtmltext(url):
try:
html = requests.get(url)
html.raise_for_status()
html.encoding = html.apparent_encoding
return html.text
except:
return None
def parserhtml(html):
List1 = re.findall(r'; title="(.*?)".*?<p class="pl">(.*?)</p>',html,re.S)
return List1
def savexlsx(List):