爬取京东商品页面
直接上代码
import pandas as pd
import requests
from bs4 import BeautifulSoup
keyword = '手机'
url = 'https://search.jd.com/Search?keyword={}&enc=utf-8&wq={}&pvid=f726ddcc0f0b4f4b9e9be11d39c76dac'.format(keyword, keyword)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
}
response = requests.get(url, headers=headers)
print(response.status_code)
soup = BeautifulSoup(response.text, 'lxml')
# select:标签名不加任何修饰,类名前加点,id名前加 #
bodys = soup.select('div#J_goodsList div.gl-i-wrap')
dict = {
'name': [],
'price': []
}
for body in bodys:
dict['name'].append(body.select('div.p-name a em')[0].text)
dict['price'].append(body.select('div.p-price strong i')[0].text)
pd.DataFrame([dict['name'],dict['price']],index=['name','price']).T.to_excel('./京东商品信息.xlsx', index=False)
# print(pd.DataFrame([dict]))