Index For TechShare

本文探讨了JavaScript中数组去重的三种常见方法,并详细解释了如何实现它们。

  • 如果js缓存比较严重的话 可以给引用的js上加个参数 比如:
    <script src="app.js?v=1"></script>
  • .inc是什么文件    点击打开链接

import pandas as pd import numpy as np from collections import defaultdict from sklearn.metrics.pairwise import cosine_similarity import warnings from Return import * from datetime import * from pandas.tseries.offsets import BMonthEnd warnings.filterwarnings('ignore') class FactorBase: """因子基类,所有因子类都应该继承这个类""" def __init__(self, t1, t2, t4): """ 初始化因子基类 Parameters: t1: DataFrame, 专利详细数据 t2: DataFrame, 专利变更历史记录 t4: DataFrame, 专利持有情况 """ self.t1 = t1.copy() self.t2 = t2.copy() self.t4 = t4.copy() self._preprocess_data() def _extract_ipc_list(self, ipc_str): """提取IPC二级分类列表""" try: if pd.isna(ipc_str): return [] if isinstance(ipc_str, str): ipc_str = ipc_str.replace("'", "").replace("[", "").replace("]", "") ipc_list = [ipc.strip() for ipc in ipc_str.split(",") if ipc.strip()] ipc_secondary = [ipc[:3] for ipc in ipc_list if len(ipc) >= 3] return list(set(ipc_secondary)) return [] except: return [] def _preprocess_data(self): """数据预处理(公共部分)""" # 只在t2中没有ipc_secondary列时才创建 if 'ipc_secondary' not in self.t2.columns: self.t2['ipc_secondary'] = self.t2['ipc_code'].apply(self._extract_ipc_list) # 处理t4中的日期字段 if 'start_date' in self.t4.columns and not pd.api.types.is_datetime64_any_dtype(self.t4['start_date']): self.t4['start_date'] = pd.to_datetime(self.t4['start_date']) if 'end_date' in self.t4.columns and not pd.api.types.is_datetime64_any_dtype(self.t4['end_date']): self.t4['end_date'] = pd.to_datetime(self.t4['end_date']) # 获取所有A股股票代码 self.a_share_tickers = set() if 'ticker' in self.t4.columns: for tickers in self.t4['ticker'].dropna(): if isinstance(tickers, str): for ticker in tickers.split('/'): if any(market in ticker for market in ['SZ', 'SH', 'BJ']): self.a_share_tickers.add(ticker) print(f"获取到 {len(self.a_share_tickers)} 个A股股票代码") def calculate_factor(self, month_end, returns_data): """计算因子值(需要子类实现)""" raise NotImplementedError("子类必须实现calculate_factor方法") #科技关联动量因子 class TechMomentum(FactorBase): """科技动量因子类""" def __init__(self, t1, t2, t4, lookback_period='1Y'): super().__init__(t1, t2, t4) self.lookback_period = lookback_period self._setup_ipc_categories() def _setup_ipc_categories(self): """设置IPC分类""" self.all_ipc_codes = set() for ipc_list in self.t2['ipc_secondary']: if isinstance(ipc_list, list): self.all_ipc_codes.update(ipc_list) self.all_ipc_codes = sorted(self.all_ipc_codes) self.ipc_to_idx = {ipc: idx for idx, ipc in enumerate(self.all_ipc_codes)} print(f"IPC二级分类数量: {len(self.all_ipc_codes)}") def _get_tech_vectors(self, month_end): """获取技术向量""" # 计算时间窗口 if self.lookback_period == '1Y': lookback_years = 1 elif self.lookback_period == '3Y': lookback_years = 3 elif self.lookback_period == '5Y': lookback_years = 5 else: lookback_years = 1 month_start = month_end - pd.DateOffset(years=lookback_years) # 筛选有效的专利持有记录(公司自身持有) valid_holdings = self.t4[ (self.t4['oaid'] == self.t4['recog_oa_id']) & (self.t4['start_date'] <= month_end) & ((self.t4['end_date'].isna()) | (self.t4['end_date'] >= month_start)) ].copy() # 只保留A股公司 valid_holdings = valid_holdings[valid_holdings['ticker'].apply( lambda x: any(market in str(x) for market in ['SZ', 'SH', 'BJ']) if pd.notna(x) else False )] tech_vectors = {} for ticker in self.a_share_tickers: # 获取该公司的专利 company_patents = valid_holdings[valid_holdings['ticker'].str.contains(ticker, na=False)] patent_ids = company_patents['patent_id'].unique() if len(patent_ids) == 0: tech_vectors[ticker] = np.zeros(len(self.all_ipc_codes)) continue # 获取专利的IPC分类 patent_ipcs = self.t2[ self.t2['patent_id'].isin(patent_ids) & (pd.to_datetime(self.t2['grant_date']) >= month_start) & (pd.to_datetime(self.t2['grant_date']) <= month_end) ] # 构建技术向量 tech_vector = np.zeros(len(self.all_ipc_codes)) for ipc_list in patent_ipcs['ipc_secondary']: if isinstance(ipc_list, list): for ipc in ipc_list: if ipc in self.ipc_to_idx: tech_vector[self.ipc_to_idx[ipc]] += 1 tech_vectors[ticker] = tech_vector return tech_vectors def calculate_factor(self, month_end, returns_data): """计算科技动量因子""" print(f"计算 {month_end.strftime('%Y-%m')} 的科技动量因子...") # 获取技术向量 tech_vectors = self._get_tech_vectors(month_end) if not tech_vectors: print(f"没有找到技术向量数据") return {} # 过滤掉全零向量 valid_tech_vectors = {k: v for k, v in tech_vectors.items() if np.any(v > 0)} if len(valid_tech_vectors) < 2: print(f"有效公司数量不足 ({len(valid_tech_vectors)})") return {} tickers = list(valid_tech_vectors.keys()) tech_matrix = np.array([valid_tech_vectors[code] for code in tickers]) # 计算技术相似度 similarity_matrix = cosine_similarity(tech_matrix) # 获取收益率数据 returns = returns_data.get(month_end, {}) # 计算科技动量因子 tech_momentum = {} for i, ticker in enumerate(tickers): numerator = 0 denominator = 0 for j in range(len(tickers)): if i != j and similarity_matrix[i, j] > 0: return_j = returns.get(tickers[j], 0) numerator += similarity_matrix[i, j] * return_j denominator += similarity_matrix[i, j] if denominator > 0: tech_momentum[ticker] = numerator / denominator else: tech_momentum[ticker] = 0 print(f" -> 成功计算 {len(tech_momentum)} 只股票的因子值") return tech_momentum class ReturnCalculator: """收益率计算类""" def __init__(self, adjf_path, pv_path): self.adjf_loader = AdjFactorLoader(adjf_path) self.pv_loader = PriceLoader(pv_path) self.return_calculator = Month_return(self.adjf_loader, self.pv_loader) def get_stock_returns(self, month_end): """获取单月收益率""" month_start = month_end.replace(day=1) trading_days = pd.date_range(month_start, month_end, freq='BM') if len(trading_days) < 2: print(f" {month_end.strftime('%Y-%m')}: 交易日不足") return {} try: returns = self.return_calculator.Return(trading_days) if returns: print(f" {month_end.strftime('%Y-%m')}: 获取 {len(returns)} 只股票收益率") return returns else: print(f" {month_end.strftime('%Y-%m')}: 无收益率数据") return {} except Exception as e: print(f" {month_end.strftime('%Y-%m')}: 计算失败 - {e}") return {} def get_multiple_months_returns(self, month_ends): """获取多月收益率""" returns_data = {} print("开始获取收益率数据...") for i, month_end in enumerate(month_ends): print(f"[{i+1}/{len(month_ends)}] ", end="") returns = self.get_stock_returns(month_end) if returns: returns_data[month_end] = returns print(f"收益率数据获取完成,共 {len(returns_data)} 个月有效数据") return returns_data class FactorProcessor: """因子处理器,用于整合所有因子和收益率数据""" def __init__(self, t1, t2, t4, adjf_path, pv_path): # 创建数据副本,避免修改原始数据 self.t1 = t1.copy() if t1 is not None else pd.DataFrame() self.t2 = t2.copy() if t2 is not None else pd.DataFrame() self.t4 = t4.copy() if t4 is not None else pd.DataFrame() self.adjf_path = adjf_path self.pv_path = pv_path self.factors = {} self.returns_calculator = ReturnCalculator(adjf_path, pv_path) def register_factor(self, factor_name, factor_class, **kwargs): """注册因子""" try: factor_instance = factor_class(self.t1, self.t2, self.t4, **kwargs) self.factors[factor_name] = factor_instance print(f"成功注册因子: {factor_name}") except Exception as e: print(f"注册因子 {factor_name} 失败: {e}") def process_factors(self, month_ends, factor_names=None): """处理所有因子(获取收益率和计算因子值)""" if factor_names is None: factor_names = list(self.factors.keys()) if not factor_names: print("没有注册任何因子") return pd.DataFrame() # 获取所有需要的收益率数据(当前月和上个月) all_returns_needed = set(month_ends) previous_months = [] for month_end in month_ends: # 计算上个月末 previous_month_end = month_end - pd.offsets.MonthBegin(1) - pd.offsets.Day(1) previous_months.append(previous_month_end) all_returns_needed.add(previous_month_end) returns_data = self.returns_calculator.get_multiple_months_returns(list(all_returns_needed)) if not returns_data: print("没有获取到有效的收益率数据") return pd.DataFrame() # 计算因子数据 all_factor_data = [] for factor_name in factor_names: if factor_name not in self.factors: print(f"警告: 因子 {factor_name} 未注册") continue print(f"\n开始计算因子: {factor_name}") factor_instance = self.factors[factor_name] for i, month_end in enumerate(month_ends): print(f"[{i+1}/{len(month_ends)}] 处理 {month_end.strftime('%Y-%m')}...") # 计算上个月末日期 previous_month_end = month_end - pd.offsets.MonthBegin(1) - pd.offsets.Day(1) try: # 使用上个月的数据计算因子值 factor_values = factor_instance.calculate_factor(previous_month_end, returns_data) if factor_values: for ticker, value in factor_values.items(): all_factor_data.append({ 'date': month_end, # 当前月(收益实现的月份) 'ticker': ticker, 'factor_value': value, # 上月末计算的因子值 'factor_name': factor_name }) except Exception as e: print(f"计算 {factor_name} 在 {previous_month_end} 失败: {e}") continue if not all_factor_data: print("没有生成任何因子数据") return pd.DataFrame() factor_df = pd.DataFrame(all_factor_data) # 合并收益率数据(当前月的收益率) return_data_list = [] for month_end in month_ends: returns = returns_data.get(month_end, {}) for ticker, ret in returns.items(): return_data_list.append({ 'date': month_end, 'ticker': ticker, 'return': ret # 当前月的收益率 }) return_df = pd.DataFrame(return_data_list) # 合并因子和收益率数据 frdata = pd.merge(factor_df, return_df, on=['date', 'ticker'], how='inner') print(f"\n数据处理完成,共 {len(frdata)} 条有效记录") print("数据格式:") print("- date: 收益实现的月份") print("- ticker: 股票代码") print("- factor_value: 上月末计算的因子值") print("- return: 当月收益率") return frdata # 使用示例 if __name__ == "__main__": # 读取数据(确保文件路径正确) try: t1 = pd.read_csv('/home/liao/zhuanlidata/1-1.秩鼎_专利数据_专利详细数据_to202407.csv') t2 = pd.read_csv('/home/liao/zhuanlidata/1-2.秩鼎_专利数据_专利变更历史记录_to202407.csv') t4 = pd.read_csv('/home/liao/zhuanlidata/1-4.秩鼎_专利数据_专利持有情况_全部子公司_to202407.csv') print("数据读取成功") except Exception as e: print(f"数据读取失败: {e}") # 创建空DataFrame继续测试 t1, t2, t4 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() # 初始化因子处理器 processor = FactorProcessor(t1, t2, t4, adjf_path='/tank0/byb/data/other/adjf_rust', pv_path='/tank0/byb/data/mikuang/pv/') # 注册因子 processor.register_factor('Tech_Momentum_1Y', TechMomentum, lookback_period='1Y') # 生成时间序列(先用少量数据测试) test_month_ends = pd.date_range('2023-01-31', '2023-02-28', freq='BM') # 处理因子数据 frdata = processor.process_factors(test_month_ends) # # 使用示例 # if __name__ == "__main__": # # 读取数据 # t1 = pd.read_csv('/path/to/t1.csv') # t2 = pd.read_csv('/path/to/t2.csv') # t4 = pd.read_csv('/path/to/t4.csv') # # 初始化因子处理器 # processor = FactorProcessor(t1, t2, t4, # adjf_path='/tank0/byb/data/other/adjf_rust', # pv_path='/tank0/byb/data/mikuang/pv/') # # 注册因子 # processor.register_factor('Tech_Momentum_1Y', TechMomentum, lookback_period='1Y') # processor.register_factor('Tech_Momentum_3Y', TechMomentum, lookback_period='3Y') # processor.register_factor('Tech_Momentum_5Y', TechMomentum, lookback_period='5Y') # # 生成时间序列 # month_ends = pd.date_range('2015-01-31', '2023-12-31', freq='M') # # 处理因子数据 # frdata = processor.process_factors(month_ends) # # 保存结果 # if not frdata.empty: # frdata.to_csv('factor_returns_data.csv', index=False) # print(f"数据已保存,共 {len(frdata)} 条记录") # # 可以用于回测 # backtester = Backtrade(frdata[frdata['factor_name'] == 'Tech_Momentum_1Y'], 'Tech_Momentum_1Y') # metrics = backtester.run_backtest() # else: # print("没有生成有效数据")修改完后输出:数据读取成功 获取到 5363 个A股股票代码 IPC二级分类数量: 127 成功注册因子: Tech_Momentum_1Y 开始获取收益率数据... [1/3] 2022-12: 交易日不足 2023-01: 交易日不足 [3/3] 2023-02: 交易日不足 收益率数据获取完成,共 0 个月有效数据 没有获取到有效的收益率数据
09-23
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值