Maximum Profit in a Stock Market

本文介绍了一种寻找股票买卖最佳时机以获得最大利润的算法。通过遍历每日股价记录并跟踪最低购买价格及最大利润,该算法能快速找出最优买卖日期。

Source:http://programming4interviews.wordpress.com/category/dynamic-programming/


Given a week`s daily prices of a stock, find out the days in which we should buy and sell the stocks such that the profit is maximum.

This is a common interview question and also very special to me as this was asked to me in an interview I attended some time back. I could not crack it fully during the interview. Taken up multiple times to solve, but left it half way. Today I am happy that finally I could crack this problem and also write a working code for it. Interestingly the answer was always in front of me here . Its just that i did not pay much attention to it.

Go through the elements in the array, keep track of the maximum profit and minimum stock price seen till then, you have your answer.

public static int findMaxProfit(int[] stockPriceSamples) {
	int maxProfit = 0;
	int minTillNow = stockPriceSamples[0];
	for (int i = 0; i < stockPriceSamples.length; i++) {
		int profit = stockPriceSamples[i] - minTillNow;
		maxProfit = Math.max(profit, maxProfit);
		minTillNow = Math.min(stockPriceSamples[i], minTillNow);
	}
	return maxProfit;
}

The driver code

public static void main(String[] args) {
	int[] stockPrices = new int[] { 5, 2, 10, 3, 10, 5, 5, 1, 17, 4, 100 };
	System.out.println(findMaxProfit(stockPrices));
}

The answer is

99

Again this is not something which is newly invented. I could find at least 3-4 links where this problem is explained very well. I personally like the code talk for me :-) . For those who want to read more and to see some alternate solutions, I found this link to be very useful.



报错:KeyError: 'monthly_return' 代码:import os import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime, timedelta import warnings warnings.filterwarnings('ignore') # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False class MultiFactorArbitrageStrategy: def __init__(self, data_path: str, start_date: str = '2014-01-01', end_date: str = '2024-12-31'): """ 多因子多空套利策略 Args: data_path: 数据文件路径 start_date: 回测开始日期 end_date: 回测结束日期 """ self.data_path = data_path self.start_date = pd.to_datetime(start_date) self.end_date = pd.to_datetime(end_date) self.data = None self.factor_data = None self.strategy_results = None # 策略参数 self.window_size = 6 # 6个月滚动窗口 self.top_n = 20 # 选股数量 self.short_n = 20 # 做空数量 self.rebalance_freq = 'M' # 月度调仓 def load_and_preprocess_data(self): """加载和预处理数据""" print("正在加载数据...") self.data = pd.read_csv(self.data_path, low_memory=False) # 处理重复列 if self.data.columns.duplicated().any(): self.data = self.data.loc[:, ~self.data.columns.duplicated()] # 转换日期格式 self.data['TradingMonth'] = pd.to_datetime(self.data['TradingMonth']) # 过滤时间范围 self.data = self.data[ (self.data['TradingMonth'] >= self.start_date) & (self.data['TradingMonth'] <= self.end_date) ].copy() # 确保数据按时间和股票代码排序 self.data = self.data.sort_values(['TradingMonth', 'Stkcd']).reset_index(drop=True) # 删除重复数据 self.data = self.data.drop_duplicates(subset=['TradingMonth', 'Stkcd'], keep='first') print(f"数据加载完成,共 {len(self.data)} 条记录") print(f"时间范围: {self.data['TradingMonth'].min()} 到 {self.data['TradingMonth'].max()}") print(f"股票数量: {self.data['Stkcd'].nunique()}") def calculate_financial_factors(self, group: pd.DataFrame) -> dict: """计算财务因子""" factors = {} available_columns = set(group.columns) # 财务指标映射 financial_indicators = { 'roe': ['ROE', 'roe', '净资产收益率'], 'roa': ['ROA', 'roa', '总资产收益率'], 'gross_profit_margin': ['GrossProfitMargin', 'gross_profit_margin', '毛利率'], 'net_profit_margin': ['NetProfitMargin', 'net_profit_margin', '净利率'], 'asset_turnover': ['AssetTurnover', 'asset_turnover', '总资产周转率'], 'current_ratio': ['CurrentRatio', 'current_ratio', '流动比率'], 'debt_to_equity': ['DebtToEquity', 'debt_to_equity', '资产负债率'], 'revenue_growth': ['RevenueGrowth', 'revenue_growth', '营业收入增长率'], 'profit_growth': ['ProfitGrowth', 'profit_growth', '净利润增长率'], 'pe_ratio': ['PERatio', 'pe_ratio', '市盈率'], 'pb_ratio': ['PBRatio', 'pb_ratio', '市净率'], 'ps_ratio': ['PSRatio', 'ps_ratio', '市销率'] } for factor_name, possible_columns in financial_indicators.items(): found_column = None for col in possible_columns: if col in available_columns: found_column = col break if found_column: try: value = group[found_column].mean() if not pd.isna(value) and value != 0: factors[factor_name] = value except: pass return factors def calculate_technical_factors(self, group: pd.DataFrame) -> dict: """计算技术因子""" factors = {} try: # 价格相关因子 if 'Clsprc' in group.columns: close = group['Clsprc'] high = group['Hiprc'] if 'Hiprc' in group.columns else close low = group['Loprc'] if 'Loprc' in group.columns else close # 移动平均线 for window in [5, 10, 20, 60]: ma = close.rolling(window=window).mean().iloc[-1] if not pd.isna(ma): factors[f'ma{window}'] = ma # MACD exp1 = close.ewm(span=12, adjust=False).mean() exp2 = close.ewm(span=26, adjust=False).mean() macd = exp1 - exp2 signal = macd.ewm(span=9, adjust=False).mean() if not pd.isna(macd.iloc[-1]): factors['macd'] = macd.iloc[-1] if not pd.isna(signal.iloc[-1]): factors['macd_signal'] = signal.iloc[-1] # RSI delta = close.diff() gain = (delta.where(delta > 0, 0)).rolling(window=14).mean() loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean() rs = gain / loss rsi = 100 - (100 / (1 + rs.iloc[-1])) if not pd.isna(rsi): factors['rsi'] = rsi # 布林带 bb_middle = close.rolling(window=20).mean().iloc[-1] if not pd.isna(bb_middle): factors['bb_middle'] = bb_middle std = close.rolling(window=20).std().iloc[-1] if not pd.isna(std): factors['bb_upper'] = bb_middle + 2 * std factors['bb_lower'] = bb_middle - 2 * std # 收益率相关因子 returns = close.pct_change() factors['return_mean'] = returns.mean() factors['return_std'] = returns.std() factors['return_skew'] = returns.skew() factors['return_kurt'] = returns.kurtosis() # 价格位置 factors['price_position'] = (close.iloc[-1] - low.min()) / (high.max() - low.min()) except Exception as e: pass return factors def calculate_market_factors(self, group: pd.DataFrame) -> dict: """计算市场因子""" factors = {} try: # 市值因子 if 'Dsmvtll' in group.columns: market_value = group['Dsmvtll'].mean() if not pd.isna(market_value): factors['market_value'] = market_value factors['log_market_value'] = np.log(market_value) # 成交量因子 if 'Dnvaltrd' in group.columns: volume = group['Dnvaltrd'] factors['volume_mean'] = volume.mean() factors['volume_std'] = volume.std() factors['volume_ratio'] = volume.iloc[-1] / volume.mean() if volume.mean() > 0 else 1 # 波动率因子 if 'Clsprc' in group.columns: returns = group['Clsprc'].pct_change() factors['volatility'] = returns.std() * np.sqrt(252) # 年化波动率 except Exception as e: pass return factors def calculate_all_factors(self): """计算所有因子""" print("正在计算因子...") results = [] grouped_data = self.data.groupby(['TradingMonth', 'Stkcd']) for (trading_month, stkcd), group in grouped_data: factor_values = { 'TradingMonth': trading_month, 'Stkcd': stkcd } # 计算各类因子 financial_factors = self.calculate_financial_factors(group) technical_factors = self.calculate_technical_factors(group) market_factors = self.calculate_market_factors(group) factor_values.update(financial_factors) factor_values.update(technical_factors) factor_values.update(market_factors) results.append(factor_values) self.factor_data = pd.DataFrame(results) # 处理缺失值 factor_cols = [col for col in self.factor_data.columns if col not in ['TradingMonth', 'Stkcd']] self.factor_data[factor_cols] = self.factor_data[factor_cols].fillna(method='ffill').fillna(method='bfill') # 标准化因子 for col in factor_cols: if self.factor_data[col].std() > 0: self.factor_data[col] = (self.factor_data[col] - self.factor_data[col].mean()) / self.factor_data[col].std() print(f"因子计算完成,共 {len(factor_cols)} 个因子") print(f"因子数据形状: {self.factor_data.shape}") def dynamic_factor_selection(self, date: pd.Timestamp, window_months: int = 6) -> list: """动态因子选择""" # 获取历史数据窗口 start_date = date - pd.DateOffset(months=window_months) historical_data = self.factor_data[ (self.factor_data['TradingMonth'] >= start_date) & (self.factor_data['TradingMonth'] < date) ] if len(historical_data) == 0: return [] # 计算因子与未来收益的相关性 factor_cols = [col for col in historical_data.columns if col not in ['TradingMonth', 'Stkcd']] # 计算未来1个月收益 historical_data = historical_data.sort_values(['Stkcd', 'TradingMonth']) historical_data['future_return'] = historical_data.groupby('Stkcd')['TradingMonth'].shift(-1) # 计算因子重要性(基于与未来收益的相关性) factor_importance = {} for factor in factor_cols: try: correlation = historical_data[factor].corr(historical_data['future_return']) if not pd.isna(correlation): factor_importance[factor] = abs(correlation) except: factor_importance[factor] = 0 # 选择最重要的因子 sorted_factors = sorted(factor_importance.items(), key=lambda x: x[1], reverse=True) selected_factors = [factor for factor, importance in sorted_factors[:20]] # 选择前20个因子 return selected_factors def calculate_stock_scores(self, date: pd.Timestamp, selected_factors: list) -> pd.DataFrame: """计算股票评分""" current_data = self.factor_data[self.factor_data['TradingMonth'] == date].copy() if len(current_data) == 0 or len(selected_factors) == 0: return pd.DataFrame() # 计算综合评分 score = 0 for factor in selected_factors: if factor in current_data.columns: # 对于某些因子,负值可能更好(如估值因子) if factor in ['pe_ratio', 'pb_ratio', 'ps_ratio', 'debt_to_equity']: score -= current_data[factor] else: score += current_data[factor] current_data['score'] = score return current_data.sort_values('score', ascending=False) def calculate_returns(self, date: pd.Timestamp, positions: dict) -> float: """计算策略收益""" next_month = date + pd.DateOffset(months=1) current_prices = self.data[self.data['TradingMonth'] == date][['Stkcd', 'Clsprc']].set_index('Stkcd')['Clsprc'] next_prices = self.data[self.data['TradingMonth'] == next_month][['Stkcd', 'Clsprc']].set_index('Stkcd')['Clsprc'] total_return = 0 for stkcd, position in positions.items(): if stkcd in current_prices.index and stkcd in next_prices.index: current_price = current_prices[stkcd] next_price = next_prices[stkcd] if current_price > 0: stock_return = (next_price - current_price) / current_price total_return += position * stock_return return total_return / len(positions) if positions else 0 def run_backtest(self): """运行回测""" print("开始回测...") # 获取所有调仓日期 rebalance_dates = pd.date_range( start=self.factor_data['TradingMonth'].min() + pd.DateOffset(months=self.window_size), end=self.factor_data['TradingMonth'].max(), freq=self.rebalance_freq ) backtest_results = [] current_positions = {} for date in rebalance_dates: # 动态因子选择 selected_factors = self.dynamic_factor_selection(date) if len(selected_factors) == 0: continue # 计算股票评分 scored_stocks = self.calculate_stock_scores(date, selected_factors) if len(scored_stocks) == 0: continue # 选择多空股票 long_stocks = scored_stocks.head(self.top_n)['Stkcd'].tolist() short_stocks = scored_stocks.tail(self.short_n)['Stkcd'].tolist() # 更新持仓 new_positions = {} for stock in long_stocks: new_positions[stock] = 1.0 / self.top_n # 等权重 for stock in short_stocks: new_positions[stock] = -1.0 / self.short_n # 等权重做空 # 计算换手率 turnover = 0 if current_positions: all_stocks = set(current_positions.keys()) | set(new_positions.keys()) for stock in all_stocks: old_pos = current_positions.get(stock, 0) new_pos = new_positions.get(stock, 0) turnover += abs(new_pos - old_pos) turnover /= 2 # 计算收益 if current_positions: monthly_return = self.calculate_returns(date, current_positions) else: monthly_return = 0 # 记录结果 backtest_results.append({ 'date': date, 'monthly_return': monthly_return, 'turnover': turnover, 'long_count': len(long_stocks), 'short_count': len(short_stocks), 'selected_factors': selected_factors[:5], # 记录前5个因子 'long_stocks': long_stocks[:10], # 记录前10只做多股票 'short_stocks': short_stocks[:10] # 记录前10只做空股票 }) current_positions = new_positions self.strategy_results = pd.DataFrame(backtest_results) # 计算累计收益 self.strategy_results['cumulative_return'] = (1 + self.strategy_results['monthly_return']).cumprod() print(f"回测完成,共 {len(self.strategy_results)} 个调仓周期") def calculate_performance_metrics(self) -> dict: """计算策略绩效指标""" if self.strategy_results is None or len(self.strategy_results) == 0: return {} returns = self.strategy_results['monthly_return'].values cumulative_returns = self.strategy_results['cumulative_return'].values # 年化收益率 total_return = cumulative_returns[-1] - 1 years = len(returns) / 12 annual_return = (1 + total_return) ** (1 / years) - 1 # 年化波动率 annual_volatility = np.std(returns) * np.sqrt(12) # 夏普比率 risk_free_rate = 0.03 # 假设无风险利率为3% sharpe_ratio = (annual_return - risk_free_rate) / annual_volatility if annual_volatility > 0 else 0 # 最大回撤 peak = np.maximum.accumulate(cumulative_returns) drawdown = (cumulative_returns - peak) / peak max_drawdown = np.min(drawdown) # 胜率 win_rate = np.sum(returns > 0) / len(returns) # 平均换手率 avg_turnover = self.strategy_results['turnover'].mean() # 卡玛比率 calmar_ratio = annual_return / abs(max_drawdown) if max_drawdown != 0 else 0 metrics = { 'annual_return': annual_return, 'annual_volatility': annual_volatility, 'sharpe_ratio': sharpe_ratio, 'max_drawdown': max_drawdown, 'win_rate': win_rate, 'avg_turnover': avg_turnover, 'calmar_ratio': calmar_ratio, 'total_return': total_return, 'total_months': len(returns) } return metrics def plot_performance(self): """绘制策略绩效图表""" if self.strategy_results is None: print("没有回测结果可供绘图") return # 创建子图 fig, axes = plt.subplots(2, 2, figsize=(15, 12)) fig.suptitle('多因子多空套利策略回测结果', fontsize=16, fontweight='bold') # 1. 累计收益率 axes[0, 0].plot(self.strategy_results['date'], self.strategy_results['cumulative_return'], linewidth=2, color='blue', label='策略累计收益') axes[0, 0].axhline(y=1, color='red', linestyle='--', alpha=0.5, label='基准线') axes[0, 0].set_title('累计收益率') axes[0, 0].set_xlabel('日期') axes[0, 0].set_ylabel('累计收益率') axes[0, 0].legend() axes[0, 0].grid(True, alpha=0.3) # 2. 月度收益率分布 axes[0, 1].hist(self.strategy_results['monthly_return'], bins=30, alpha=0.7, color='green', edgecolor='black') axes[0, 1].axvline(x=0, color='red', linestyle='--', alpha=0.7, label='零收益线') axes[0, 1].set_title('月度收益率分布') axes[0, 1].set_xlabel('月度收益率') axes[0, 1].set_ylabel('频次') axes[0, 1].legend() axes[0, 1].grid(True, alpha=0.3) # 3. 回撤曲线 cumulative_returns = self.strategy_results['cumulative_return'].values peak = np.maximum.accumulate(cumulative_returns) drawdown = (cumulative_returns - peak) / peak axes[1, 0].fill_between(self.strategy_results['date'], drawdown, 0, alpha=0.3, color='red', label='回撤') axes[1, 0].plot(self.strategy_results['date'], drawdown, color='red', linewidth=1) axes[1, 0].set_title('回撤曲线') axes[1, 0].set_xlabel('日期') axes[1, 0].set_ylabel('回撤率') axes[1, 0].legend() axes[1, 0].grid(True, alpha=0.3) # 4. 换手率 axes[1, 1].plot(self.strategy_results['date'], self.strategy_results['turnover'], linewidth=1, color='orange', label='换手率') axes[1, 1].axhline(y=self.strategy_results['turnover'].mean(), color='red', linestyle='--', alpha=0.7, label=f'平均换手率: {self.strategy_results["turnover"].mean():.2%}') axes[1, 1].set_title('换手率变化') axes[1, 1].set_xlabel('日期') axes[1, 1].set_ylabel('换手率') axes[1, 1].legend() axes[1, 1].grid(True, alpha=0.3) plt.tight_layout() plt.savefig('策略绩效图.png', dpi=300, bbox_inches='tight') plt.show() # 绘制持仓情况 self.plot_holdings() def plot_holdings(self): """绘制持仓情况""" if self.strategy_results is None: return # 统计持仓股票 all_long_stocks = [] all_short_stocks = [] for _, row in self.strategy_results.iterrows(): all_long_stocks.extend(row['long_stocks']) all_short_stocks.extend(row['short_stocks']) # 统计股票出现频次 long_freq = pd.Series(all_long_stocks).value_counts().head(20) short_freq = pd.Series(all_short_stocks).value_counts().head(20) # 绘制持仓频次图 fig, axes = plt.subplots(1, 2, figsize=(16, 8)) fig.suptitle('策略持仓股票频次统计', fontsize=16, fontweight='bold') # 做多股票频次 axes[0].barh(range(len(long_freq)), long_freq.values, color='green', alpha=0.7) axes[0].set_yticks(range(len(long_freq))) axes[0].set_yticklabels(long_freq.index) axes[0].set_title('做多股票频次 (Top 20)') axes[0].set_xlabel('出现频次') axes[0].invert_yaxis() # 做空股票频次 axes[1].barh(range(len(short_freq)), short_freq.values, color='red', alpha=0.7) axes[1].set_yticks(range(len(short_freq))) axes[1].set_yticklabels(short_freq.index) axes[1].set_title('做空股票频次 (Top 20)') axes[1].set_xlabel('出现频次') axes[1].invert_yaxis() plt.tight_layout() plt.savefig('持仓情况图.png', dpi=300, bbox_inches='tight') plt.show() def print_performance_summary(self): """打印策略绩效摘要""" metrics = self.calculate_performance_metrics() if not metrics: print("无法计算绩效指标") return print("\n" + "="*60) print("多因子多空套利策略绩效摘要") print("="*60) print(f"回测期间: {self.start_date.strftime('%Y-%m-%d')} 至 {self.end_date.strftime('%Y-%m-%d')}") print(f"总调仓次数: {metrics['total_months']}") print(f"总收益率: {metrics['total_return']:.2%}") print(f"年化收益率: {metrics['annual_return']:.2%}") print(f"年化波动率: {metrics['annual_volatility']:.2%}") print(f"夏普比率: {metrics['sharpe_ratio']:.3f}") print(f"最大回撤: {metrics['max_drawdown']:.2%}") print(f"胜率: {metrics['win_rate']:.2%}") print(f"平均换手率: {metrics['avg_turnover']:.2%}") print(f"卡玛比率: {metrics['calmar_ratio']:.3f}") print("="*60) # 打印最新持仓情况 if len(self.strategy_results) > 0: latest_result = self.strategy_results.iloc[-1] print(f"\n最新调仓日期: {latest_result['date'].strftime('%Y-%m-%d')}") print(f"做多股票数量: {latest_result['long_count']}") print(f"做空股票数量: {latest_result['short_count']}") print(f"主要因子: {latest_result['selected_factors']}") print(f"做多股票: {latest_result['long_stocks'][:5]}...") print(f"做空股票: {latest_result['short_stocks'][:5]}...") def run_strategy(self): """运行完整策略""" print("开始运行多因子多空套利策略...") # 1. 加载数据 self.load_and_preprocess_data() # 2. 计算因子 self.calculate_all_factors() # 3. 运行回测 self.run_backtest() # 4. 计算绩效指标 metrics = self.calculate_performance_metrics() # 5. 打印结果 self.print_performance_summary() # 6. 绘制图表 self.plot_performance() print("策略运行完成!") def main(): """主函数""" # 创建策略实例 strategy = MultiFactorArbitrageStrategy( data_path='/Users/diligence/PycharmProjects/mt_fx/merge_data/datafile/m_data/n_mdata1.csv', start_date='2014-01-01', end_date='2024-12-31' ) # 运行策略 strategy.run_strategy() if __name__ == "__main__": main()
07-31
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值