获取一些基金的信息
1.fund_stack(max_page=26) max_page自己固定了最大页码数是26页,这个没有做成自动获取页码数,反正是为了获取优秀的排名靠前的,获取靠后的也没什么意义。
通过改变页码获取http://fund.eastmoney.com/data/rankhandler.aspx 当前页面的股票型的基金数据,通过xpath解析后将数据写入到数据库中。
2.通过1获取到的基金id,之后将这些基金id的列表进行遍历,请求指定的URL获取数据。
3.请求http://fundf10.eastmoney.com/FundArchivesDatas.aspx获取每个基金下面的股票持仓情况。
# -*- coding:utf-8 -*-
import re
import json
import time
import requests
from datetime import datetime
from conf.conn import Pymsql_conn
from lxml import etree
class EastMoney(object):
def __init__(self):
self.pm = Pymsql_conn()
self.url = 'http://fund.eastmoney.com/data/rankhandler.aspx'
self.headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Host": "fund.eastmoney.com",
"Referer": "http://fund.eastmoney.com/data/fundranking.html",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
}
self.params = {
"op": "ph",
"dt": "kf",
"ft": "gp",
"rs": "",
"gs": "0",
"sc": "zzf",
"st": "desc",
"sd": "2018-05-03", # 开始时间
"ed": "2020-05-06", # 截止时间
"qdii": "",
"tabSubtype": ",,,,,",
"pi": "2", # 页码
"pn": "50", # 单页获取数据条数
"dx": "1",
"v": "0.11302465026589648",
}
self.fund_list = ["004856"]
# 请求每页数据内容
def parse_fund_stack(self, num):
self.params['pi'] = num
# json_str = requests.get(url=self.url, headers=self.headers, params=self.params).text
json_str = EastMoney.get_html(self, url=self.url, parse=self.params)
print(json_str)
json_dic = re.findall('{datas:(.*]),', json_str)
# print(json_dic)
fund_data = json.loads(json_dic[0])
# print(fund_data, len(fund_data))
for fund_info in fund_data:
# print(fund_info)
fund_list = fund_info.split(',')
# print(fund_list)
fund_id = fund_list[0] # 基金代码
self.fund_list.append(fund_id)
fund_name = fund_list[1] # 基金名
fund_name_e = fund_list[2] # 基金名简称
fund_date = fund_list[3] # 最后日期 可排序
nva = fund_list[4] # 单位净值
acc_nva = fund_list[5] # 累计净值
daily_growth_rate = fund_list[6] # 日增长率
weekly_growth_rate = fund_list[7] # 周增长率
monthly_growth_rate = fund_list[8] # 月增长率
three_monthly_growth_rate = fund_list[9] # 三月增长率
six_monthly_growth_rate = fund_list[10] # 半年增长率
one_year_growth_rate = fund_list[11] # 一年增长率
two_year_growth_rate = fund_list[12] # 两年增长率
three_year_growth_rate = fund_list[13] # 三年增长率
this_year_growth_rate = fund_list[14] # 今年增长率
established_growth_rate = fund_list[15] # 成立以来增长率
custom = fund_list[18] # 自定义 查询条件
service_fee = fund_list[20] # 手续费
print(fund_id, fund_name, fund_name_e, fund_date, nva, acc_nva, daily_growth_rate, weekly_growth_rate,
monthly_growth_rate, three_monthly_growth_rate, six_monthly_growth_rate, one_year_growth_rate,
two_year_growth_rate, three_year_growth_rate, this_year_growth_rate, established_growth_rate, custom,
service_fee)
# print(type(established_growth_rate), float(established_growth_rate))
createtime = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
try:
val = (fund_id, fund_name, fund_name_e, fund_date, nva, acc_nva, daily_growth_rate, weekly_growth_rate,
monthly_growth_rate, three_monthly_growth_rate, six_monthly_growth_rate, one_year_growth_rate,
two_year_growth_rate, three_year_growth_rate, this_year_growth_rate, established_growth_rate,
custom,
service_fee, createtime)
sql = f"""insert into fund_stock(fund_id, fund_name, fund_name_e, fund_date, nva, acc_nva,
daily_growth_rate,weekly_growth_rate,monthly_growth_rate, three_monthly_growth_rate,
six_monthly_growth_rate,one_year_growth_rate,two_year_growth_rate, three_year_growth_rate,
this_year_growth_rate,established_growth_rate, custom,service_fee,createtime) values {val} on
duplicate key update fund_id=values(fund_id), fund_name=values(fund_name), fund_name_e=
values(fund_name_e),fund_date=values(fund_date), nva=values(nva), acc_nva=values(acc_nva),
daily_growth_rate=values(daily_growth_rate), weekly_growth_rate=values(weekly_growth_rate),
monthly_growth_rate=values(monthly_growth_rate),three_monthly_growth_rate=values
(three_monthly_growth_rate),six_monthly_growth_rate=values(six_monthly_growth_rate),
one_year_growth_rate=values(one_year_growth_rate), two_year_growth_rate=values
(two_year_growth_rate),three_year_growth_rate=values(three_year_growth_rate),
this_year_growth_rate=values(this_year_growth_rate),established_growth_rate=values
(established_growth_rate), custom=values(custom), service_fee=values(service_fee),
createtime=values(createtime),changetime=values(createtime)"""
self.pm.insert_info(sql=sql)
except Exception as e:
print(e)
self.pm.commit_info()
# 获取全页面数据的数据
def fund_stack(self, max_page):
for page in range(1, max_page + 1):
print(page)
EastMoney.parse_fund_stack(self, num=page)
return True
# 请求页面数据
def get_html(self, url, parse=None):
params = parse
"""
请求目标页面,并把html的结果返回
:param url:
:return:
"""
response = requests.get(url=url, headers=self.headers, params=params)
response= response.text.encode(encoding=response.encoding).decode('utf-8')
response= response.text.encode(encoding='ISO-8859-1')
return response
# 获取基金页面的详情数据
def fund_profile(self):
"""
将前期生成fund_list的数据生成目标URL,之后对数据进行请求
:return:
"""
print(self.fund_list)
for fund_id in self.fund_list:
fund_profile_url = f'http://fund.eastmoney.com/{fund_id}.html'
print(fund_profile_url)
while 1:
html = EastMoney.get_html(self, url=fund_profile_url)
# print(html)
tree = etree.HTML(html)
try:
fund_name = tree.xpath('//*[@id="body"]//div[@class="fundDetail-tit"]/div/text()')[0]
except Exception as e:
print(e, 153)
try:
fund_form = tree.xpath('//*[@id="body"]//div[@class="infoOfFund"]/table//tr[1]/td[1]/a/text()')[0]
except Exception as e:
print(e, 157)
try:
establishment_time = \
tree.xpath('//*[@id="body"]/div[12]/div/div/div[3]/div[1]/div[2]/table//tr[2]/td[1]/text()')[
0].replace(':', '')
except Exception as e:
print(e, 163)
try:
assets = \
tree.xpath('//*[@id="body"]/div[12]/div/div/div[3]/div[1]/div[2]/table//tr[1]/td[2]/text()')[
0].replace(':', '')
except Exception as e:
print(e, 169)
try:
fund_manager = \
tree.xpath('//*[@id="body"]/div[12]/div/div/div[3]/div[1]/div[2]/table//tr[1]/td[3]/a/text()')[
0]
except Exception as e:
print(e, 175)
try:
fund_manager_id = tree.xpath('//*[@id="body"]//td[@class="td02"]/a/@href')[0].split('/')[
-1].replace(
'.html', '')
except Exception as e:
print(e, 181)
try:
fund_company_id = \
tree.xpath('//*[@id="body"]//div[@class="infoOfFund"]//table//tr[2]/td[2]/a/@href')[0].split(
'/')[
-1].replace('.html', '')
except Exception as e:
print(e, 188)
try:
fund_company = \
tree.xpath('//*[@id="body"]/div[12]/div/div/div[3]/div[1]/div[2]/table//tr[2]/td[2]/a/text()')[
0]
except Exception as e:
print(e, 194)
createtime = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
print(fund_name, fund_id, fund_form, establishment_time, assets, fund_manager, fund_manager_id,
fund_company_id,
fund_company, createtime)
try:
val = (fund_name, fund_id, fund_form, establishment_time, assets, fund_manager, fund_manager_id,
fund_company_id,
fund_company, createtime)
sql = f"""insert into fund_profile(fund_name, fund_id, fund_form, establishment_time, assets, fund_manager, fund_manager_id, fund_company_id,
fund_company, createtime) values {val} on
duplicate key update fund_id=values(fund_id), fund_name=values(fund_name), fund_form=
values(fund_form),establishment_time=values(establishment_time), assets=values(assets), fund_manager=values(fund_manager),
fund_manager_id=values(fund_manager_id), fund_company_id=values(fund_company_id),
fund_company=values(fund_company),createtime=values
(createtime),changetime=values(createtime)"""
self.pm.insert_info(sql=sql)
break
except Exception as e:
print(e)
self.pm.commit_info()
# 获取每个基金页面所有的股票持仓数据,每个季度末更新下即可
def stack_positions(self):
"""
将前期生成fund_list的数据生成目标URL,之后对数据进行请求
:return:
"""
fund_profile_url = 'http://fundf10.eastmoney.com/FundArchivesDatas.aspx'
params = {
"type": "jjcc",
"code": "",
"topline": 20,
"year": 2020,
"month": "",
}
sql = 'select * from fund_profile'
fund_list = self.pm.check_info(sql=sql)
print(fund_list)
for fund_dict in fund_list:
fund_id = fund_dict.get('fund_id')
# fund_id = "004856"
params['code'] = fund_id
print(params)
html = self.get_html(url=fund_profile_url, parse=params)
tree = etree.HTML(html)
tr_list = tree.xpath('/html/body//table/tbody/tr')
if tr_list:
fund_name = tree.xpath('/html/body/div[1]/div/h4/label[1]/a/text()')[0] # 持股市值万元
print(tr_list)
for tr in tr_list:
try:
stack_id = tr.xpath('./td[2]/a/text()')[0] # 股票id
stack_name = tr.xpath('./td[3]/a/text()')[0] # 股票名称
net_worth = tr.xpath('./td[7]/text()')[0] # 占净值比例
stack_num = tr.xpath('./td[8]/text()')[0] # 持股数万股
stack_value = tr.xpath('./td[9]/text()')[0] # 持股市值万元
except Exception as e:
print(e)
stack_id = tr.xpath('./td[2]/a/text()')[0] # 股票id
stack_name = tr.xpath('./td[3]/a/text()')[0] # 股票名称
net_worth = tr.xpath('./td[5]/text()')[0] # 占净值比例
stack_num = tr.xpath('./td[6]/text()')[0] # 持股数万股
stack_value = tr.xpath('./td[7]/text()')[0] # 持股市值万元
createtime = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
val = (stack_id, stack_name, net_worth, stack_num, stack_value, fund_id, fund_name, createtime)
print(val)
# break
try:
sql = f'insert into stack_position(stack_id,stack_name,net_worth,stack_num,stack_value,fund_id,' \
f'fund_name,createtime) values {val} on duplicate key update stack_id=values(stack_id),stack_name=' \
f'values(stack_name),net_worth=values(net_worth),stack_num=values(stack_num),stack_value=values' \
f'(stack_value),fund_id=values(fund_id),fund_name=values(fund_name),changetime=values(createtime)'
self.pm.insert_info(sql=sql)
except Exception as e:
print(e)
self.pm.commit_info()
if __name__ == '__main__':
s = time.time()
# 实例化类
em = EastMoney()
# 当前类下的函数获取股票类的基金数据
res = em.fund_stack(max_page=26) # 更新表中的fund_stack的数据
# 获取当前基金的详细信息
em.fund_profile()
# 获取当前基金下的股票持仓情况
em.stack_positions()
print(time.time() - s)
该代码实现从东方财富网抓取基金排名、基金详情和基金持仓数据,并存储到数据库中。首先,通过`fund_stack`函数获取基金排名数据,然后遍历基金ID,使用`fund_profile`获取基金详情,最后通过`stack_positions`获取基金持仓信息。整个过程中,使用了requests库进行HTTP请求,lxml库解析HTML,Pymsql_conn模块操作数据库。
515

被折叠的 条评论
为什么被折叠?



