先看源代码
import requests
from lxml import html
def spider(sn,book_list = []):
"""爬取京东图书数据"""
url = 'https://search.jd.com/Search?keyword={0}'.format(sn) # 使用format方法获取图书sn号
heard = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
# 获取html文档
resp = requests.get(url,headers=heard) # 取出头部
resp.encoding = 'utf-8'
html_doc = resp.text
# 获取xpath对象
selector = html