# https://zhuanlan.zhihu.com/p/365897117
# https://blog.youkuaiyun.com/weixin_50646402/article/details/130780623
# https://blog.youkuaiyun.com/jtj2002/article/details/131263607
result = pd.DataFrame(columns = ['Stkcd','Accper','GPtotal','GPfaming','GPshiyong']) # 专利申请数
a=1
for code in jian_zhuanlidata['Stkcd'].unique():
a=a+1
format_jian_zhuanlidata = jian_zhuanlidata[jian_zhuanlidata['Stkcd'] == code] # 提取对应代码
for nianfen in format_jian_zhuanlidata['Accper']:
nianfen_format_jian_zhuanlidata = jian_zhuanlidata[jian_zhuanlidata['Accper'] == nianfen] # 提取对应代码所在年份
faming_count = 0
shiyong_count = 0
total_count = 0
for AppliNo in format_jian_zhuanlidata['AppliNo']: # 判断所在年份是1还是2
print(AppliNo)
print('AppliNo',AppliNo[4])
if AppliNo[4] ==1:
faming_count += 1
if AppliNo[4] ==2:
shiyong_count += 1
total_count = faming_count + shiyong_count
rt = pd.DataFrame({'Stkcd': [code],
'Accper': [nianfen],
'GPtotal': [total_count],
'GPfaming': [faming_count],
'GPshiyong': [shiyong_count]})
# counts = format_jian_zhuanlidata['Accper'].value_counts().sort_index()
# rt = pd.DataFrame(rt)
result = pd.concat([result,rt])
if a==1:
break
result = result.reset_index(drop=True)
print(result)
# for code in jian_zhaunlidata['Stkcd'].unique():
# format_jian_zhaunlidata = jian_zhaunlidata[jian_zhaunlidata['Stkcd'] == code]
# for nianfen in format_jian_zhaunlidata['Accper']:
# nianfen_format_jian_zhaunlidata = format_jian_zhaunlidata[format_jian_zhaunlidata['Accper'] == nianfen]
# nianfen_format_jian_zhaunlidata['GPtotal'] = len(nianfen_format_jian_zhaunlidata.index)
# nianfen_format_jian_zhaunlidata['GPfaming'] = len(nianfen_format_jian_zhaunlidata[nianfen_format_jian_zhaunlidata['AppliNo'] // 10000 % 10 == 1].index)
# nianfen_format_jian_zhaunlidata['GPshiyong'] = len(nianfen_format_jian_zhaunlidata[nianfen_format_jian_zhaunlidata['AppliNo'] // 10000 % 10 == 2].index)
# result = pd.concat([result, nianfen_format_jian_zhaunlidata], ignore_index=True)
# 输出结果
# print(result.head())
经纬度查询距离:
pip install cpca
from geopy.distance import geodesic
# 参数为两个元组,每个元组包含经度和纬度
coord_1 = (39.917978, 116.396288) # 北京天安门坐标
coord_2 = (31.230416, 121.473701) # 上海市区坐标
distance = geodesic(coord_1, coord_2).km # 距离结果单位为千米
print("距离为:{:.2f}千米".format(distance)) # 输出距离结果
pip install geopy
pip install chinese_province_city_area_mapper
# 方法一:直接通过 CPCATransformer 指定
cpca = CPCATransformer({"朝阳区":"北京市"})
df = cpca.transform(location_str)
# 方法二:通过内置模块 umap 调用默认地址字典
from chinese_province_city_area_mapper import myumap
cpca = CPCATransformer(myumap.umap)
df = cpca.transform(location_str)
from chinese_province_city_area_mapper.infrastructure import SuperMap
#地区到市的映射数据库,是一个字典类型(key为区名,value为其所属的市名),注意其中包含重复的区名
SuperMap.area_city_mapper
#重复的区名列表,列表类型,如果区名在这个列表中,则area_city_mapper的映射是不准确的
SuperMap.rep_areas
#市到省的映射数据库,字典类型(key为市的名称,value为省的名称)
SuperMap.city_province_mapper
#全国省市区的经纬度数据库,字典类型(key为"省,市,区",value为(维度,经度))
SuperMap.lat_lon_mapper
#获取北京市朝阳区的经纬度
SuperMap.lat_lon_mapper.get("北京市,北京市,朝阳区")
插值法构建整个区间内的利率曲线:
from scipy.interpolate import CubicSpline
import numpy as np
tenors = ['1M', '3M', '6M', '9M', '1Y', '2Y', '3Y', '5Y', '7Y', '10Y', '15Y', '20Y', '30Y', '40Y', '50Y']
zeroRates = [1.7394,2.1815,2.2711,2.2586,2.2743,2.3635,2.4480,2.5514,2.7146,2.7103,2.8371,2.8882,3.0855,3.2349,3.2833] # # 中债即期收益率 东方财富-利率走势-债券数据库-即期收益率
# 将时间点转换为年份
tenors_in_years = []
for tenor in tenors:
if tenor[-1] == 'M':
tenors_in_years.append(int(tenor[:-1])/12)
elif tenor[-1] == 'Y':
tenors_in_years.append(int(tenor[:-1]))
# 使用样条插值方法计算即期利率
cs = CubicSpline(tenors_in_years, zeroRates)
spotRates = cs(np.arange(1/12, 50+1/12, 1/12))
sequence = []
for i in range(1, 602):
sequence.append(str(i) + "M")
sequence = sequence[:-1] # Remove the last comma
# print(sequence)
# 输出结果
df = pd.DataFrame({'term': sequence, 'ytm': spotRates})
df
ALTER TABLE ks_crawler1.k_cbt_daily_report
DROP COLUMN date_time,
DROP COLUMN bondCode;
DELETE FROM ks_crawler1.k_cbt_daily_report WHERE bondCode IS NOT NULL;
SELECT count(*) FROM ks_crawler1.k_cbt_daily_report WHERE bondCode IS NOT NULL;