准备工作:
python 3.9
开发工具:
pycharm
爬取分析:
首先进入雪球界面,然后开始翻找带返回k线数据的接口
既然数据有接口返回,那就直接从接口获取数据
实现代码:
"""
Author: cbk
Time: 2025/6/3 15:49
Project: examples-of-web-crawlers
"""
import requests
from bs4 import BeautifulSoup
import pandas as pd
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'cookie': 'xxxxxxx'
}
# 雪球网页地址(需要根据实际情况调整)
url = 'https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol=SH512890&begin=1749024046307&period=week&type=before&count=-284&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance'
# 发送请求
response = requests.get(url, headers=headers)
response.encoding = 'utf-8' # 确保编码正确
# 解析网页内容
soup = BeautifulSoup(response.text, 'html.parser')
response.encoding = 'utf-8' # 确保编码正确
# 检查请求是否成功
if response.status_code == 200:
# 解析JSON数据
data = response.json()
# 提取数据
columns = data['data']['column']
items = data['data']['item']
# 创建DataFrame
df = pd.DataFrame(items, columns=columns)
# 将毫秒级时间戳转换为日期字符串
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
# 导出到Excel文件
df.to_excel('红利低波ETF_实时行情.xlsx', index=False)
print("数据已成功导出到 红利低波ETF_实时行情.xlsx 文件中!")
else:
print(f"请求失败,状态码: {response.status_code}")