import requests
import time
import csv
from lxml import etree
# headers={"User-Agent":}
for i in range(1):
final = []
url = "https://dianying.taobao.com/showList.htm?spm=a1z21.3046609.w2.3.4d60112aCdaBZl&n_s=new"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
html = requests.get(url,headers=headers)
code = html.status_code
print(code)
if code == 200:
selecter = etree.HTML(html.text)
files = selecter.xpath('/html/body/div[4]/div[1]/div[2]/div[1]/div')
# print(files)
temp = []
for file in files:
book_name=file.xpath("./a[1]/div[3]/span[1]/text()")[0]
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[1]
# book_introduce=file.xpath('./a[1]/div[4]/div[2]/span[2]/text()')[0]
book_introduce_files = file.xpath('./a[1]/div[4]/div[2]/span')
book_introduce_temp = []
for book_introduce_file in book_introduce_files:
book_introduce_temp.append(book_introduce_file.text)
# book_introduce_1 = file.xpath('./text()')
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[4]/div[2]
# //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/p/text()
# book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')
if file.xpath('./a[1]/div[3]/span[2]/text()') != []:
book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')[0]
else:
book_grades=''
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[2]
# //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/div/span[2]
# book_scrible=file.xpath('./tr/td[2]/p[2]/span/text()')
print("电影名:{}\n".format(book_name))
temp.append(book_name)
print("电影基本信息:\n")
for s in book_introduce_temp:
print(" {}\n".format(s))
temp.append(s)
print("电影评分:{}\n".format(book_grades))
temp.append(book_grades)
temp.append('\n')
print('\n')
else:
continue
final.append(temp)
with open('maoyan.text', 'w+', errors='ignore', newline='',encoding='utf-8') as f:
f_txt = csv.writer(f)
f_txt.writerows(final)
python爬虫(可用)
淘宝电影爬虫实战
最新推荐文章于 2023-08-14 17:26:41 发布
本文介绍了一种使用Python爬取淘宝电影网站数据的方法,通过requests和lxml.etree解析网页,获取电影名称、简介和评分等信息,并将数据保存到本地文件中。此爬虫展示了如何设置请求头避免反爬,利用XPath提取所需数据。
63万+

被折叠的 条评论
为什么被折叠?



