话不多说上代码
第一步导入模块
import requests
import re
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from lxml import html
etree = html.etree
对网站进行了一波分析,发现中关村并没有用动态数据加载这里就直接爬取好了
def get_articles_list():
title_list = []
price_list = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
url = 'http://top.zol.com.cn/compositor/57/cell_phone.html'
page = requests