couch base使用记录

在使用Couchbase进行测试时,遇到了Python客户端TypeError错误。通过检查错误信息,发现问题出在尝试将字符串作为整数索引来使用。本文详细解释了错误原因,并提供了解决方案。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1. couch base测试时,上报错误:

[push@pushtest bin]$ ./cbworkloadgen -n localhost:8091

Traceback (most recent call last):
  File "/opt/couchbase/lib/python/cbworkloadgen", line 79, in _run
    options.verbose)
  File "/opt/couchbase/lib/python/couchbase/couchbaseclient.py", line 684, in __init__
    self.rest = RestConnection(server)
  File "/opt/couchbase/lib/python/couchbase/rest_client.py", line 185, in __init__
    self.couch_api_base = config["nodes"][0].get("couchApiBase")
TypeError: string indices must be integers
from qiskit import QuantumCircuit, execute, Aer, transpile from qiskit.circuit.library import ZZFeatureMap from qiskit_machine_learning.kernels import QuantumKernel from qiskit.aqua.components.optimizers import COBYLA from dwave.system import EmbeddingComposite, DWaveSampler import numpy as np import pandas as pd import tensorflow as tf from sklearn.svm import SVC from scipy.stats import poisson import nltk from nltk.sentiment import SentimentIntensityAnalyzer from sklearn.preprocessing import MinMaxScaler from functools import lru_cache, wraps import logging from concurrent.futures import ThreadPoolExecutor import time import warnings import psutil import pickle import sys import math # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('QuantumFootballPrediction') warnings.filterwarnings('ignore', category=UserWarning) # 初始化NLP工具 try: nltk.data.find('sentiment/vader_lexicon.zip') except LookupError: nltk.download('vader_lexicon', quiet=True) sia = SentimentIntensityAnalyzer() # 添加性能监控 def performance_monitor(func): call_counts = {} @wraps(func) def wrapper(*args, **kwargs): start_time = time.perf_counter() start_mem = psutil.Process().memory_info().rss / 1024 / 1024 # MB result = func(*args, **kwargs) elapsed = time.perf_counter() - start_time end_mem = psutil.Process().memory_info().rss / 1024 / 1024 # 获取类实例 instance = args[0] if args else None logger_name = getattr(instance, 'logger', logger).name if instance else logger.name # 记录执行时间与内存 logging.getLogger(logger_name).info( f"{func.__name__}: " f"Time={elapsed:.4f}s, " f"Mem={end_mem - start_mem:.2f}MB, " f"Called={call_counts.get(func.__name__, 0) + 1} times" ) # 更新调用计数 call_counts[func.__name__] = call_counts.get(func.__name__, 0) + 1 # 量子电路特定指标 if 'quantum' in func.__name__ and isinstance(result, dict) and 'circuit' in result: circuit = result['circuit'] logging.getLogger(logger_name).debug( f"Quantum circuit depth: {circuit.depth()}, gates: {len(circuit)}" ) return result return wrapper class QuantumMatchPredictor: """量子比赛预测器 - 核心量子预测组件""" def __init__(self, n_qubits=4): self.n_qubits = n_qubits self.optimizer = COBYLA(maxiter=100) self.backend = Aer.get_backend('qasm_simulator') self.feature_map = ZZFeatureMap(n_qubits, reps=2) def build_circuit(self, features: np.ndarray) -> QuantumCircuit: """构建参数化预测线路 - 优化为O(n)复杂度""" qc = QuantumCircuit(self.n_qubits) # 特征编码层 - 使用特征值直接映射到旋转门 for i, val in enumerate(features[:self.n_qubits]): qc.ry(val * np.pi, i) # 特征值映射到[0, π]范围 # 添加特征映射层 qc.compose(self.feature_map, inplace=True) return qc @performance_monitor def predict(self, features: np.ndarray, shots=1024) -> float: """量子预测核心 - 返回主胜概率""" qc = self.build_circuit(features) # 添加测量 qc.measure_all() # 执行并返回概率 try: tqc = transpile(qc, self.backend, optimization_level=1) result = execute(tqc, self.backend, shots=shots).result() counts = result.get_counts(tqc) # 返回全1状态的概率作为主胜概率 return counts.get('1'*self.n_qubits, 0) / shots except Exception as e: logger.error(f"量子预测执行失败: {e}") # 返回随机值作为后备 return np.random.uniform(0.4, 0.6) class QuantumFootballPredictionModel: # 资源管理常量 MAX_QUBITS = 8 MAX_BATCH_SIZE = 8 _quantum_shots = 5000 def __init__(self, biomechanics_model_path=None): # 联赛特异性权重模板 self.league_templates = { '英超': {'体能系数': 1.3, '顺力度': 0.9, '特殊规则': '补时>5分钟时大球概率+15%'}, '意甲': {'防守强度': 1.5, '往绩克制': 1.2, '特殊规则': '平局概率基准值+8%'}, '德甲': {'主场优势': 1.4, '前锋缺阵影响': 2, '特殊规则': '半场领先最终胜率92%'}, '西甲': {'技术流修正': 1.2, '裁判影响': 0.7, '特殊规则': '强队让1.25盘口下盘率61%'} } # 量子计算资源 self.quantum_resources = self.detect_quantum_resources() logger.info(f"检测到量子资源: {self.quantum_resources}") # 量子计算后端 self.quantum_simulator = Aer.get_backend('qasm_simulator') self.dwave_sampler = None if self.quantum_resources.get('dwave', False): try: self.dwave_sampler = EmbeddingComposite(DWaveSampler()) logger.info("成功连接D-Wave量子处理器") except Exception as e: logger.warning(f"无法连接D-Wave量子处理器: {e}") # 预加载模型 self.biomechanics_model = None if biomechanics_model_path: try: self.biomechanics_model = tf.keras.models.load_model(biomechanics_model_path) logger.info("成功加载生物力学模型") except Exception as e: logger.error(f"加载生物力学模型失败: {e}") # 初始化量子匹配预测器 self.quantum_predictor = QuantumMatchPredictor(n_qubits=4) self.lstm_model = self.build_lstm_model() self._energy_cache = {} self.energy_matrix = np.array([[0.5, -0.3], [-0.3, 0.5]]) # 预计算矩阵 @property def quantum_shots(self): if self.quantum_resources.get('dwave', False): return 2000 # D-Wave系统采样更可靠,可减少次数 elif self.quantum_resources.get('ibmq', False): return 8192 # IBMQ硬件需要更多采样 else: return self._quantum_shots # 默认模拟器采样次数 def detect_quantum_resources(self): """检测可用量子计算资源""" resources = { "local_simulator": True, "ibmq": False, "dwave": False } try: from qiskit import IBMQ IBMQ.load_account() providers = IBMQ.providers() resources["ibmq"] = len(providers) > 0 except Exception as e: logger.info(f"IBM Quantum访问失败: {e}") try: from dwave.cloud import Client client = Client.from_config() if client.get_solvers(): resources["dwave"] = True client.close() except Exception as e: logger.info(f"D-Wave访问失败: {e}") return resources def build_lstm_model(self): """优化后的LSTM变盘预测模型""" model = tf.keras.Sequential([ tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(60, 3)), tf.keras.layers.Dropout(0.2), tf.keras.layers.LSTM(64), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(3, activation='softmax') ]) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy']) return model # ======================== # 输入层:数据标准化 # ======================== @performance_monitor def standardize_input(self, raw_data): """数据标准化处理""" standardized = {} # 基础信息 league_weights = {'英超': 1.2, '西甲': 1.15, '意甲': 1.1, '德甲': 1.15, '法甲': 1.05} weather_weights = {'晴天': 0, '多云': -0.1, '小雨': -0.2, '大雨': -0.3, '雪': -0.4} standardized['league'] = league_weights.get(raw_data.get('league', '其他'), 1.0) standardized['weather'] = weather_weights.get(raw_data.get('weather', '晴天'), 0) standardized['pitch'] = raw_data.get('pitch_condition', 0.85) # 赛季周数(用于动态权重计算) match_week = raw_data.get('match_week', 1) standardized['match_week'] = min(max(match_week, 1), 38) # 限制在1-38周范围内 # 球队实力 home_rank = raw_data.get('home_rank', 10) away_rank = raw_data.get('away_rank', 10) home_win_rate = raw_data.get('home_win_rate', 0.5) away_win_rate = raw_data.get('away_win_rate', 0.5) home_rank_weight = 1 / max(home_rank, 1) # 防止除零 away_rank_weight = 1 / max(away_rank, 1) # 向量化计算 home_factors = np.array([home_win_rate, home_rank_weight]) away_factors = np.array([away_win_rate, away_rank_weight]) weights = np.array([0.3, 0.7]) standardized['home_strength'] = np.dot(home_factors, weights) standardized['away_strength'] = np.dot(away_factors, weights) # 保存原始胜率用于量子预测 standardized['home_win_rate'] = home_win_rate standardized['away_win_rate'] = away_win_rate # 交锋历史 historical_win_rate = raw_data.get('historical_win_rate', 0.5) historical_win_odds_rate = raw_data.get('historical_win_odds_rate', 0.5) standardized['history_advantage'] = ( 0.6 * historical_win_rate + 0.4 * historical_win_odds_rate ) # 伤停影响 injury_impact = 0 injuries = raw_data.get('injuries', {}) for position, count in injuries.items(): if position == 'forward': injury_impact -= 0.2 * count elif position == 'goalkeeper': injury_impact -= 0.3 * count elif position == 'defender': injury_impact -= 0.15 * count elif position == 'midfielder': injury_impact -= 0.1 * count standardized['injury_impact'] = min(max(injury_impact, -1.0), 0) # 限制范围 # 赛程疲劳 matches_last_7_days = raw_data.get('matches_last_7_days', 0) travel_distance = raw_data.get('travel_distance', 0) standardized['fatigue'] = min( 0.3 * min(matches_last_7_days, 4) + # 限制最多4场 0.7 * min(travel_distance / 1000, 5.0), # 限制最多5000公里 10.0 # 最大值限制 ) # 赔率数据 standardized['william_odds'] = raw_data.get('william_odds', [2.0, 3.0, 3.5]) standardized['asian_odds'] = raw_data.get('asian_odds', [0.85, 0.95, 1.0, 0.92]) # 市场热度 betfair_volume_percent = raw_data.get('betfair_volume_percent', 0.5) theoretical_probability = raw_data.get('theoretical_probability', 0.5) if theoretical_probability > 0: standardized['market_heat'] = min(max( (betfair_volume_percent - theoretical_probability) / theoretical_probability, -1.0), 1.0) # 限制在[-1,1]范围 else: standardized['market_heat'] = 0.0 # 3D姿态数据 if 'player_movement' in raw_data and self.biomechanics_model is not None: try: standardized['injury_risk'] = self.injury_risk_prediction(raw_data['player_movement']) except Exception as e: logger.error(f"伤病风险预测失败: {e}") standardized['injury_risk'] = 0.0 else: standardized['injury_risk'] = 0.0 return standardized # ======================== # 特征引擎层 # ======================== def injury_risk_prediction(self, player_movement): """3D姿态伤病预测""" asymmetry = self.calc_joint_asymmetry(player_movement) load_dist = self.weight_distribution_imbalance(player_movement) # 确保输入为二维数组 input_data = np.array([[asymmetry, load_dist]]) return self.biomechanics_model.predict(input_data, verbose=0)[0][0] def calc_joint_asymmetry(self, movement_data): """计算关节不对称性""" # 假设movement_data包含左右数据 left_side = movement_data.get('left_side', np.zeros(10)) right_side = movement_data.get('right_side', np.zeros(10)) return np.mean(np.abs(left_side - right_side)) def weight_distribution_imbalance(self, movement_data): """计算重量分布不平衡""" front_load = movement_data.get('front_load', np.zeros(10)) back_load = movement_data.get('back_load', np.zeros(10)) return np.abs(np.mean(front_load - back_load)) @performance_monitor def quantum_clustering(self, odds_matrix): """量子聚类广实区间判定""" try: # 限制特征维度 feature_dimension = min(odds_matrix.shape[1], 4) feature_map = ZZFeatureMap(feature_dimension=feature_dimension, reps=2) q_kernel = QuantumKernel(feature_map=feature_map, quantum_instance=self.quantum_simulator) return SVC(kernel=q_kernel.evaluate).fit_predict(odds_matrix) except Exception as e: logger.error(f"量子聚类失败: {e}") # 返回随机结果作为后备 return np.random.randint(0, 2, size=odds_matrix.shape[0]) def momentum_calculation(self, recent_form, history_advantage, form_continuity, odds_match, market_sentiment): """顺力度量化计算""" return min(max( 0.4 * recent_form + 0.3 * history_advantage + 0.15 * form_continuity + 0.1 * odds_match + 0.05 * market_sentiment, 0.0), 10.0) # 限制在[0,10]范围 @lru_cache(maxsize=100) def energy_balance_model(self, home_energy, away_energy, match_type): """优化后的动态能量守恒模型""" # 添加对match_type的处理 if match_type not in ['regular', 'derby', 'cup']: match_type = 'regular' # 根据比赛类型设置参数 gamma = 0.8 if match_type == 'derby' else 0.5 if match_type == 'cup' else 0.3 t = self.standardized_data.get('fatigue', 0) team_energy = np.array([home_energy, away_energy]) fatigue_vector = np.array([-gamma * t, gamma * t]) dE = np.dot(self.energy_matrix, team_energy) + fatigue_vector # 限制能量变化范围 dE[0] = min(max(dE[0], -1.0), 1.0) dE[1] = min(max(dE[1], -1.0), 1.0) return dE[0], dE[1] # ======================== # 量子优化层 # ======================== @performance_monitor def quantum_annealing_optimizer(self, prob_dist, market_data): """量子退火赔率平衡器""" # 参数校验 if not all(0 <= p <= 1 for p in prob_dist): logger.warning("概率分布值超出范围,进行归一化") total = sum(prob_dist) if total > 0: prob_dist = [p/total for p in prob_dist] else: prob_dist = [1/3, 1/3, 1/3] if self.dwave_sampler is None: logger.warning("D-Wave采样器不可用,使用经典优化") # 经典模拟退火作为后备 from scipy.optimize import minimize def objective(x): return -np.dot(prob_dist, x) # 约束条件:概率和为1 constraints = ({'type': 'eq', 'fun': lambda x: sum(x) - 1}) bounds = [(0, 1) for _ in range(3)] initial_guess = prob_dist result = minimize(objective, initial_guess, method='SLSQP', bounds=bounds, constraints=constraints) return result.x if result.success else [1/3, 1/3, 1/3] h = {0: -prob_dist[0], 1: -prob_dist[1], 2: -prob_dist[2]} J = {(0,1): market_data.get('arbitrage', 0), (0,2): -market_data.get('deviation', 0), (1,2): market_data.get('trend', 0)} try: response = self.dwave_sampler.sample_ising(h, J, num_reads=1000) sample = response.first.sample # 将样本转换为概率调整 adjusted_probs = [prob_dist[i] * (1 + 0.1 * sample[i]) for i in range(3)] # 归一化 total = sum(adjusted_probs) return [p/total for p in adjusted_probs] if total > 0 else [1/3, 1/3, 1/3] except Exception as e: logger.error(f"量子退火优化失败: {e}") return prob_dist def lstm_odds_change(self, historical_odds): """LSTM赔率变盘预测""" if historical_odds is None or len(historical_odds) < 60: logger.warning("历史赔率数据不足,使用简单平均") return np.argmax(np.mean(historical_odds, axis=0)) if historical_odds else 0 # 数据预处理 scaler = MinMaxScaler() scaled_odds = scaler.fit_transform(historical_odds) X = scaled_odds.reshape(1, scaled_odds.shape[0], scaled_odds.shape[1]) # 预测变盘方向 prediction = self.lstm_model.predict(X, verbose=0) return np.argmax(prediction) @performance_monitor def quantum_score_simulation(self, home_energy, away_energy, hist_avg): """优化后的量子蒙特卡洛比分预测""" # 使用更高效的量子比特分配方案 qc = QuantumCircuit(4) # 减少到4个量子比特以提高效率 # 主客队能量编码 theta_home = home_energy * np.pi theta_away = away_energy * np.pi # 应用旋转门 qc.ry(theta_home, 0) qc.ry(theta_away, 1) # 历史交锋影响 hist_factor = min(1.0, abs(hist_avg[0] - hist_avg[1]) * 0.5) if hist_avg[0] > hist_avg[1]: qc.crx(hist_factor * np.pi, 0, 2) else: qc.crx(hist_factor * np.pi, 1, 3) # 纠缠量子比特以增加相关性 - O(n)复杂度 for i in range(self.n_qubits - 1): qc.cx(i, i+1) # 测量 qc.measure_all() # 量子采样 backend = self.quantum_simulator try: tqc = transpile(qc, backend, optimization_level=3) job = execute(tqc, backend, shots=self.quantum_shots) result = job.result() counts = result.get_counts(tqc) except Exception as e: logger.error(f"量子比分模拟失败: {e}") return f"{round(hist_avg[0])}-{round(hist_avg[1])}" # 改进的比分解码逻辑 goal_counts = {} for state, count in counts.items(): # 前2位表示主队进球数,后2位表示客队进球数 home_goals = int(state[:2], 2) % 5 # 限制最大进球数为4 away_goals = int(state[2:], 2) % 5 # 应用泊松分布修正 home_goals = min(home_goals, 4) away_goals = min(away_goals, 4) score = f"{home_goals}-{away_goals}" goal_counts[score] = goal_counts.get(score, 0) + count # 返回出现频率最高的比分 return max(goal_counts, key=goal_counts.get) # ======================== # 动态修正层 # ======================== def apply_league_template(self, league_type, features): """应用联赛特异性模板""" template = self.league_templates.get(league_type, {}) for feature, factor in template.items(): if feature in features and isinstance(features[feature], (int, float)): features[feature] *= factor return features def enhanced_cold_warning(self, team_energy, market_data): """增强型冷门预警""" risk_score = 0 # 能量赔率偏离 fair_odds = market_data.get('fair_odds', 0.5) if abs(team_energy - fair_odds) > 0.25: risk_score += 35 # 社交媒体情绪 if market_data.get('social_sentiment', 0) > 0.65: risk_score += 20 # 主力轮换检测 training_intensity = market_data.get('training_intensity', 50) coach_tone = market_data.get('coach_tone', 'normal') if training_intensity < 45 and coach_tone == 'conservative': risk_score += 25 return min(risk_score, 100) # 上限100 def inplay_calibration(self, pre_match_pred, match_status): """修复后的滚球实时校准""" win_prob = pre_match_pred['home_win_prob'] # 领先方被压制 if 'xG_diff' in match_status and 'possession' in match_status: if match_status['xG_diff'] < -0.8 and match_status.get('score_lead', 0) == 1: win_prob -= 0.15 * (1 - match_status['possession']/100) # 红牌影响 if match_status.get('red_card', False): card_team = match_status.get('card_team', 0) # 0=主队, 1=客队 minute = match_status.get('minute', 0) time_factor = max(0, (90 - minute) / 90) if card_team == 0: # 主队红牌 win_prob -= 0.4 * time_factor else: # 客队红牌 win_prob += 0.25 * time_factor # 确保概率在[0,1]范围内 win_prob = max(0.0, min(1.0, win_prob)) # 更新预测结果 updated_pred = pre_match_pred.copy() updated_pred['home_win_prob'] = win_prob return updated_pred # ======================== # 决策输出层 # ======================== def win_probability(self, home_rating, away_rating, william_odds, return_rate=0.95, fatigue_factor=0): """修复后的胜平负概率计算""" rating_diff = (home_rating - away_rating) / 5 base_prob = 1 / (1 + np.exp(-rating_diff)) european_prob = return_rate / william_odds[0] if william_odds[0] > 0 else base_prob p_home_win = base_prob * 0.7 + european_prob * 0.3 if fatigue_factor > 2.0: away_rating_adj = away_rating * (1 - min(fatigue_factor/10, 0.3)) rating_diff_adj = (home_rating - away_rating_adj) / 5 base_prob_adj = 1 / (1 + np.exp(-rating_diff_adj)) p_home_win = base_prob_adj * 0.6 + european_prob * 0.4 # 改进的平局概率计算 draw_prob = (1 - abs(home_rating - away_rating)/10) * 0.3 # 考虑赔率中的平局概率 if len(william_odds) > 1 and william_odds[1] > 0: draw_prob = (draw_prob + return_rate/william_odds[1]) / 2 # 确保概率在合理范围内 p_home_win = min(max(p_home_win, 0.1), 0.8) draw_prob = min(max(draw_prob, 0.1), 0.5) p_away_win = 1 - p_home_win - draw_prob # 归一化处理 total = p_home_win + draw_prob + p_away_win p_home_win /= total draw_prob /= total p_away_win /= total return p_home_win, draw_prob, p_away_win # ======================== # 动态权重计算 # ======================== def calculate_dynamic_weights(self, match_week): """ 计算量子模型和传统模型的动态权重 使用公式: α(t) = 1 / (1 + exp(-k(t - t0))) 其中 t0 = 20 (赛季中段), k = 0.1 量子权重: w_q = α(t) × 0.7 + 0.3 传统权重: w_c = 1 - w_q """ t = match_week t0 = 20 # 赛季中段 k = 0.1 # 调整参数 # 计算α(t) alpha = 1 / (1 + math.exp(-k * (t - t0))) # 量子权重在0.3-1.0之间变化 w_quantum = alpha * 0.7 + 0.3 w_classic = 1 - w_quantum logger.info(f"动态权重计算: 赛季周数={t}, α={alpha:.3f}, 量子权重={w_quantum:.3f}, 传统权重={w_classic:.3f}") return w_quantum, w_classic # ======================== # 量子特征预测 # ======================== def quantum_feature_prediction(self, features: dict) -> float: """ 使用量子模型预测比赛特征 特征顺序: home_strength, away_strength, history_advantage, fatigue """ try: # 准备量子预测的特征数组 quantum_features = np.array([ features['home_strength'], features['away_strength'], features['history_advantage'], features['fatigue'] ]) # 进行量子预测 return self.quantum_predictor.predict(quantum_features) except Exception as e: logger.error(f"量子特征预测失败: {e}") # 返回随机值作为后备 return np.random.uniform(0.4, 0.6) # ======================== # 整合预测流程 # ======================== @performance_monitor def predict(self, raw_data): """整合预测流程 - 包含量子与传统模型动态融合""" try: # 1. 数据标准化 self.standardized_data = self.standardize_input(raw_data) league = raw_data.get('league', '英超') # 2. 应用联赛模板 self.apply_league_template(league, self.standardized_data) # 3. 计算球队能量 home_energy = self.standardized_data['home_strength'] * (1 + self.standardized_data['history_advantage']) away_energy = self.standardized_data['away_strength'] * (1 - self.standardized_data['injury_impact']) # 4. 量子比分模拟 hist_avg = raw_data.get('historical_avg_score', [1.5, 1.2]) predicted_score = self.quantum_score_simulation(home_energy, away_energy, hist_avg) # 5. 传统胜平负概率计算 william_odds = self.standardized_data['william_odds'] home_win, draw, away_win = self.win_probability( home_energy, away_energy, william_odds, fatigue_factor=self.standardized_data['fatigue'] ) # 6. 量子特征预测 quantum_prediction = self.quantum_feature_prediction({ 'home_strength': self.standardized_data['home_strength'], 'away_strength': self.standardized_data['away_strength'], 'history_advantage': self.standardized_data['history_advantage'], 'fatigue': self.standardized_data['fatigue'] }) # 7. 动态融合量子与传统预测 match_week = self.standardized_data.get('match_week', 1) w_quantum, w_classic = self.calculate_dynamic_weights(match_week) # 融合主胜概率 fused_home_win = w_quantum * quantum_prediction + w_classic * home_win # 保持相对比例不变 scale = (1 - fused_home_win) / (1 - home_win) if home_win < 1.0 else 1.0 fused_draw = draw * scale fused_away_win = away_win * scale # 归一化处理 total = fused_home_win + fused_draw + fused_away_win fused_home_win /= total fused_draw /= total fused_away_win /= total fused_probs = [fused_home_win, fused_draw, fused_away_win] # 8. 量子退火优化赔率 market_data = { 'arbitrage': raw_data.get('arbitrage_opportunity', 0.05), 'deviation': raw_data.get('odds_deviation', 0.1), 'trend': raw_data.get('market_trend', 0.0) } optimized_probs = self.quantum_annealing_optimizer(fused_probs, market_data) # 9. 冷门预警 cold_risk = self.enhanced_cold_warning(home_energy, { 'fair_odds': 1/(william_odds[0] * 1.05), 'social_sentiment': raw_data.get('social_sentiment', 0.5), 'training_intensity': raw_data.get('training_intensity', 60), 'coach_tone': raw_data.get('coach_tone', 'normal') }) # 10. 构建最终结果 result = { 'predicted_score': predicted_score, 'home_win_prob': optimized_probs[0], 'draw_prob': optimized_probs[1], 'away_win_prob': optimized_probs[2], 'cold_risk': f"{cold_risk}%", 'key_factors': { 'home_energy': home_energy, 'away_energy': away_energy, 'injury_impact': self.standardized_data['injury_impact'], 'fatigue_factor': self.standardized_data['fatigue'], 'weather_impact': self.standardized_data['weather'], 'quantum_prediction': quantum_prediction, 'w_quantum': w_quantum, 'w_classic': w_classic }, 'quantum_optimized': True if self.dwave_sampler else False } # 11. 如果有滚球数据,进行实时校准 if 'inplay_data' in raw_data: result = self.inplay_calibration(result, raw_data['inplay_data']) return result except Exception as e: logger.exception("预测过程中发生错误") return { 'error': str(e), 'fallback_prediction': self.fallback_prediction(raw_data) } def fallback_prediction(self, raw_data): """经典方法作为后备预测""" # 简单基于排名和赔率的预测 home_rank = raw_data.get('home_rank', 10) away_rank = raw_data.get('away_rank', 10) home_adv = (away_rank - home_rank) * 0.05 # 基本胜率计算 home_win = 0.4 + home_adv away_win = 0.3 - home_adv draw = 0.3 # 考虑赔率影响 if 'william_odds' in raw_data: odds = raw_data['william_odds'] implied_home = 1/odds[0] home_win = (home_win + implied_home) / 2 return { 'home_win_prob': home_win, 'draw_prob': draw, 'away_win_prob': away_win, 'predicted_score': "1-1", 'cold_risk': "30%", 'note': "经典预测方法" } # 示例使用 if __name__ == "__main__": # 创建示例数据 sample_data = { 'league': '英超', 'weather': '小雨', 'pitch_condition': 0.8, 'home_rank': 3, 'away_rank': 8, 'home_win_rate': 0.7, 'away_win_rate': 0.4, 'historical_win_rate': 0.6, 'historical_win_odds_rate': 0.55, 'injuries': {'forward': 1, 'defender': 2}, 'matches_last_7_days': 2, 'travel_distance': 800, # 公里 'william_odds': [1.8, 3.4, 4.2], 'asian_odds': [0.85, 0.95, 1.0, 0.92], 'betfair_volume_percent': 0.65, 'theoretical_probability': 0.55, 'historical_avg_score': [1.8, 1.0], 'arbitrage_opportunity': 0.03, 'odds_deviation': 0.15, 'market_trend': -0.1, 'social_sentiment': 0.7, 'training_intensity': 40, 'coach_tone': 'conservative', 'match_type': 'derby', # 比赛类型 'match_week': 5 # 赛季第5周 } # 添加3D姿态数据(简化示例) sample_data['player_movement'] = { 'left_side': np.array([0.8, 0.7, 0.9, 0.85, 0.75]), 'right_side': np.array([0.6, 0.65, 0.7, 0.8, 0.75]), 'front_load': np.array([0.5, 0.55, 0.6]), 'back_load': np.array([0.4, 0.45, 0.5]) } # 创建模型实例并预测 model = QuantumFootballPredictionModel() prediction = model.predict(sample_data) print("\n足球比赛预测结果:") print(f"预测比分: {prediction.get('predicted_score', 'N/A')}") print(f"主胜概率: {prediction.get('home_win_prob', 0):.2%}") print(f"平局概率: {prediction.get('draw_prob', 0):.2%}") print(f"客胜概率: {prediction.get('away_win_prob', 0):.2%}") print(f"量子预测值: {prediction.get('key_factors', {}).get('quantum_prediction', 0):.2%}") print(f"量子权重: {prediction.get('key_factors', {}).get('w_quantum', 0):.2f}") print(f"传统权重: {prediction.get('key_factors', {}).get('w_classic', 0):.2f}") print(f"冷门风险: {prediction.get('cold_risk', 'N/A')}") print(f"是否量子优化: {prediction.get('quantum_optimized', False)}") # 添加滚球数据示例 sample_data['inplay_data'] = { 'minute': 60, 'score': '1-0', 'possession': 45, # 主队控球率% 'xG_diff': -0.7, # 主队预期进球差 'red_card': True, 'card_team': 0 # 0=主队 } # 滚球校准 inplay_prediction = model.predict(sample_data) print("\n滚球校准后主胜概率:", f"{inplay_prediction.get('home_win_prob', 0):.2%}")
06-16
对代码进行理解、排查错误问题,特别关注是强化学习部分,根据你的推理逻辑给出正确且合理的方案步骤(文字描述),并给出优化后正确逻辑的完整代码 import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter import numpy as np import random import argparse from collections import deque from torch.distributions import Normal, Categorical from torch.nn.parallel import DistributedDataParallel as DDP import matplotlib.pyplot as plt from tqdm import tqdm import time from mmengine.registry import MODELS, DATASETS from mmengine.config import Config from rl_seg.datasets.build_dataloader import init_dist_pytorch, build_dataloader from rl_seg.datasets import load_data_to_gpu device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(f"Using device: {device}") # PPO 代理(Actor-Critic 网络) class PPOAgent(nn.Module): def __init__(self, state_dim, action_dim, hidden_dim=256): super(PPOAgent, self).__init__() self.state_dim = state_dim self.action_dim = action_dim # 共享特征提取层 self.shared_layers = nn.Sequential( nn.Linear(state_dim, hidden_dim), # nn.ReLU(), nn.LayerNorm(hidden_dim), nn.GELU(), nn.Linear(hidden_dim, hidden_dim), # nn.ReLU() nn.LayerNorm(hidden_dim), nn.GELU(), ) # Actor 网络 (策略) self.actor = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), # nn.ReLU(), nn.GELU(), nn.Linear(hidden_dim, action_dim), nn.Tanh() # 输出在[-1,1]范围内 ) # Critic 网络 (值函数) self.critic = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), # nn.ReLU(), nn.GELU(), nn.Linear(hidden_dim, 1) ) # 动作标准差 (可学习参数) self.log_std = nn.Parameter(torch.zeros(1, action_dim)) # 初始化权重 self.apply(self._init_weights) def _init_weights(self, module): """初始化网络权重""" if isinstance(module, nn.Linear): nn.init.orthogonal_(module.weight, gain=0.01) nn.init.constant_(module.bias, 0.0) def forward(self, state): features = self.shared_layers(state) action_mean = self.actor(features) value = self.critic(features) return action_mean, value def act(self, state): """与环境交互时选择动作""" state = torch.FloatTensor(state).unsqueeze(0).to(device) # 确保是 [1, state_dim] print(state.shape) with torch.no_grad(): action_mean, value = self.forward(state) # 创建动作分布 (添加最小标准差确保稳定性) action_std = torch.clamp(self.log_std.exp(), min=0.01, max=1.0) dist = Normal(action_mean, action_std) # 采样动作 action = dist.sample() log_prob = dist.log_prob(action).sum(-1) return action, log_prob, value def evaluate(self, state, action): """评估动作的概率和值""" # 添加维度检查 if len(state.shape) == 1: state = state.unsqueeze(0) if len(action.shape) == 1: action = action.unsqueeze(0) action_mean, value = self.forward(state) # 创建动作分布 action_std = torch.clamp(self.log_std.exp(), min=0.01, max=1.0) dist = Normal(action_mean, action_std) # 计算对数概率和熵 log_prob = dist.log_prob(action).sum(-1) entropy = dist.entropy().sum(-1) return log_prob, entropy, value # 强化学习优化器 class PPOTrainer: """PPO训练器,整合了策略优化和模型微调""" def __init__(self, seg_net, agent, cfg): """ Args: seg_net: 预训练的分割网络 agent: PPO智能体 cfg: 配置对象,包含以下属性: - lr: 学习率 - clip_param: PPO裁剪参数 - ppo_epochs: PPO更新轮数 - gamma: 折扣因子 - tau: GAE参数 - value_coef: 值函数损失权重 - entropy_coef: 熵正则化权重 - max_grad_norm: 梯度裁剪阈值 """ self.seg_net = seg_net self._base_seg_net = seg_net.module if isinstance(seg_net, DDP) else seg_net self._base_seg_net.device = self.seg_net.device self.agent = agent self.cfg = cfg self.writer = SummaryWriter(log_dir='runs/ppo_trainer') # 使用分离的优化器 self.optimizer_seg = optim.AdamW( self.seg_net.parameters(), lr=cfg.lr, weight_decay=1e-4 ) self.optimizer_agent = optim.AdamW( self.agent.parameters(), lr=cfg.lr, weight_decay=1e-4 ) # 训练记录 self.best_miou = 0.0 self.metrics = { 'loss': [], 'reward': [], 'miou': [], 'class_ious': [], 'lr': [] } def compute_state(self, features, pred, gt_seg): """ 计算强化学习状态向量 Args: features: 从extract_features获取的字典包含: - spatial_features: [B, C1, H, W] - bev_features: [B, C2, H, W] - neck_features: [B, C3, H, W] pred: 网络预测的分割结果 [B, num_classes, H, W] gt_seg: 真实分割标签 [B, H, W] Returns: state: 状态向量 [state_dim] """ # 主要使用neck_features作为代表特征 torch.Size([4, 64, 496, 432]) feats = features["neck_features"] # [B, C, H, W] print(feats.shape) B, C, H, W = feats.shape # 初始化状态列表 states = [] # 为批次中每个样本单独计算状态 for i in range(B): # 特征统计 feat_mean = feats[i].mean(dim=(1, 2)) # [C] feat_std = feats[i].std(dim=(1, 2)) # [C] # 预测类别分布 pred_classes = pred[i].argmax(dim=0) # [H, W] class_dist = torch.bincount( pred_classes.flatten(), minlength=21 ).float() / (H * W) # 各类IoU (需实现单样本IoU计算) sample_miou, sample_cls_iou = self.compute_sample_iou( pred[i:i+1], {k: v[i:i+1] for k, v in gt_seg.items()} ) sample_cls_iou = torch.FloatTensor(sample_cls_iou).to(feats.device) # 组合状态 state = torch.cat([ feat_mean, feat_std, class_dist, sample_cls_iou ]) states.append(state) return torch.stack(states) # 特征统计 (均值、标准差) feat_mean = feats.mean(dim=(2, 3)).flatten() # [B*C] feat_std = feats.std(dim=(2, 3)).flatten() # [B*C] # 预测类别分布 pred_classes = pred.argmax(dim=1) # class_dist = torch.bincount(pred_classes.flatten(), minlength=21).float() / pred_classes.numel() class_dist = torch.bincount( pred_classes.flatten(), minlength=21 ).float() / (B * H * W) # 各类IoU batch_miou, cls_iou = get_miou(pred, gt_seg, classes=range(21)) cls_iou = torch.FloatTensor(cls_iou).to(feats.device) # 组合状态 state = torch.cat([feat_mean, feat_std, class_dist, cls_iou]) print(feat_mean.shape, feat_std.shape, class_dist.shape, cls_iou.shape) print(state.shape) # 必须与PPOAgent的state_dim完全一致 assert len(state) == self.agent.state_dim, \ f"State dim mismatch: {len(state)} != {self.agent.state_dim}" return state def compute_reward(self, miou, prev_miou, class_ious, prev_class_ious): """ 计算复合奖励函数 Args: miou: 当前mIoU prev_miou: 前一次mIoU class_ious: 当前各类IoU [num_classes] prev_class_ious: 前一次各类IoU [num_classes] Returns: reward: 综合奖励值 """ # 基础奖励: mIoU提升 miou_reward = 10.0 * (miou - prev_miou) # 类别平衡奖励: 鼓励所有类别均衡提升 class_reward = 0.0 for cls, (iou, prev_iou) in enumerate(zip(class_ious, prev_class_ious)): if iou > prev_iou: # 对稀有类别给予更高奖励 weight = 1.0 + (1.0 - prev_iou) # 性能越差的类权重越高 class_reward += weight * (iou - prev_iou) # 惩罚项: 防止某些类别性能严重下降 penalty = 0.0 # for cls in range(21): # if class_ious[cls] < prev_class_ious[cls] * 0.8: # penalty += 5.0 * (prev_class_ious[cls] - class_ious[cls]) for cls, (iou, prev_iou) in enumerate(zip(class_ious, prev_class_ious)): if iou < prev_iou * 0.9: # 性能下降超过10% penalty += 5.0 * (prev_iou - iou) total_reward = miou_reward + class_reward - penalty return np.clip(total_reward, -5.0, 10.0) # 限制奖励范围 def apply_action(self, action): """ 应用智能体动作调整模型参数 Args: action: [6] 连续动作向量,范围[-1, 1] """ # 动作0-1: 调整学习率 lr_scale = 0.1 + 0.9 * (action[0] + 1) / 2 # 映射到[0.1, 1.0] for param_group in self.optimizer.param_groups: param_group['lr'] *= lr_scale # 动作2-3: 调整特征提取层权重 (范围[0.8, 1.2]) backbone_scale = 0.8 + 0.2 * (action[2] + 1) / 2 with torch.no_grad(): for param in self.seg_net.module.backbone_2d.parameters(): param.data *= backbone_scale # (0.9 + 0.1 * action[2]) # 调整范围[0.9,1.1] # 动作4-5: 调整分类头权重 head_scale = 0.8 + 0.2 * (action[4] + 1) / 2 with torch.no_grad(): for param in self.seg_net.module.at_seg_head.parameters(): param.data *= head_scale # (0.9 + 0.1 * action[4]) # 调整范围[0.9,1.1] def train_epoch(self, train_loader, epoch): """执行一个训练周期""" epoch_metrics = { 'seg_loss': 0.0, 'reward': 0.0, 'miou': 0.0, 'class_ious': np.zeros(21), 'policy_loss': 0.0, 'value_loss': 0.0, 'entropy_loss': 0.0, 'batch_count': 0 } self.seg_net.train() self.agent.train() for data_dicts in tqdm(train_loader, desc=f"RL Epoch {epoch+1}/{self.cfg.num_epochs_rl}"): load_data_to_gpu(data_dicts) # 初始预测和特征 with torch.no_grad(): initial_pred = self.seg_net(data_dicts) initial_miou, initial_class_ious = get_miou( initial_pred, data_dicts, classes=range(21) ) features = self.seg_net.module.extract_features(data_dicts) # DDP包装了 # features = self._base_seg_net.extract_features(data_dicts) # 计算初始状态 states = self.compute_state(features, initial_pred, data_dicts) # 为批次中每个样本选择动作 actions, log_probs, values = [], [], [] for state in states: action, log_prob, value = self.agent.act(state.cpu().numpy()) actions.append(action) log_probs.append(log_prob) values.append(value) # 应用第一个样本的动作 (简化处理) self.apply_action(actions[0]) # 调整后的预测 adjusted_pred = self.seg_net(data_dicts) adjusted_miou, adjusted_class_ious = get_miou( adjusted_pred, data_dicts, classes=range(21) ) # 计算奖励 (使用整个批次的平均改进) reward = self.compute_reward( adjusted_miou, initial_miou, adjusted_class_ious, initial_class_ious ) # 计算优势 (修正为单步优势) advantages = [reward - v for v in values] # 存储经验 experience = { 'states': states.cpu().numpy(), 'actions': actions, 'rewards': [reward] * len(actions), 'old_log_probs': log_probs, 'old_values': values, 'advantages': advantages, } # PPO优化 policy_loss, value_loss, entropy_loss = self.ppo_update(experience) # 分割网络损失 seg_loss = self.seg_net.module.at_seg_head.get_loss( adjusted_pred, data_dicts ) # 分割网络更新 (使用单独优化器) self.optimizer_seg.zero_grad() seg_loss.backward() torch.nn.utils.clip_grad_norm_( self.seg_net.parameters(), self.cfg.max_grad_norm ) self.optimizer_seg.step() # 记录指标 epoch_metrics['seg_loss'] += seg_loss.item() epoch_metrics['reward'] += reward epoch_metrics['miou'] += adjusted_miou epoch_metrics['class_ious'] += adjusted_class_ious epoch_metrics['policy_loss'] += policy_loss epoch_metrics['value_loss'] += value_loss epoch_metrics['entropy_loss'] += entropy_loss epoch_metrics['batch_count'] += 1 # 计算平均指标 avg_metrics = {} for k in epoch_metrics: if k != 'batch_count': avg_metrics[k] = epoch_metrics[k] / epoch_metrics['batch_count'] # 记录到TensorBoard self.writer.add_scalar('Loss/seg_loss', avg_metrics['seg_loss'], epoch) self.writer.add_scalar('Reward/total', avg_metrics['reward'], epoch) self.writer.add_scalar('mIoU/train', avg_metrics['miou'], epoch) self.writer.add_scalar('Loss/policy', avg_metrics['policy_loss'], epoch) self.writer.add_scalar('Loss/value', avg_metrics['value_loss'], epoch) self.writer.add_scalar('Loss/entropy', avg_metrics['entropy_loss'], epoch) return avg_metrics def ppo_update(self, experience): """ PPO策略优化步骤 Args: batch: 包含以下键的字典: - states: [batch_size, state_dim] - actions: [batch_size, action_dim] - old_log_probs: [batch_size] - old_values: [batch_size] - rewards: [batch_size] - advantages: [batch_size] Returns: policy_loss: 策略损失值 value_loss: 值函数损失值 entropy_loss: 熵损失值 """ states = torch.FloatTensor(experience['states']).unsqueeze(0).to(device) actions = torch.FloatTensor(experience['actions']).unsqueeze(0).to(device) old_log_probs = torch.FloatTensor([experience['old_log_probs']]).to(device) old_values = torch.FloatTensor([experience['old_values']]).to(device) rewards = torch.FloatTensor([experience['rewards']]).to(device) advantages = torch.FloatTensor(experience['advantages']).to(device) # GAE优势 优势估计使用GAE(广义优势估计) policy_losses, value_losses, entropy_losses = [], [], [] for _ in range(self.cfg.ppo_epochs): # 评估当前策略 log_probs, entropy, values = self.agent.evaluate(states, actions) # 比率 ratios = torch.exp(log_probs - old_log_probs) # 裁剪目标 surr1 = ratios * advantages surr2 = torch.clamp(ratios, 1.0 - self.cfg.clip_param, 1.0 + self.cfg.clip_param) * advantages # 策略损失 policy_loss = -torch.min(surr1, surr2).mean() # 值函数损失 value_loss = 0.5 * (values - rewards).pow(2).mean() # 熵损失 entropy_loss = -entropy.mean() # 总损失 loss = policy_loss + self.cfg.value_coef * value_loss + self.cfg.entropy_coef * entropy_loss # 智能体参数更新 self.optimizer_agent.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.agent.parameters(), self.cfg.max_grad_norm ) self.optimizer_agent.step() policy_losses.append(policy_loss.item()) value_losses.append(value_loss.item()) entropy_losses.append(entropy_loss.item()) return ( np.mean(policy_losses), np.mean(value_losses), np.mean(entropy_losses) ) def close(self): """关闭资源""" self.writer.close() # 监督学习预训练 def supervised_pretrain(cfg): seg_net = MODELS.build(cfg.model).to('cuda') seg_head = MODELS.build(cfg.model.at_seg_head).to('cuda') if cfg.pretrained_path: ckpt = torch.load(cfg.pretrained_path) print(ckpt.keys()) seg_net.load_state_dict(ckpt['state_dict']) print(f'Load pretrained ckpt: {cfg.pretrained_path}') seg_net = DDP(seg_net, device_ids=[cfg.local_rank]) print(seg_net) return seg_net optimizer = optim.Adam(seg_net.parameters(), lr=cfg.lr) writer = SummaryWriter(log_dir='runs/pretrain') train_losses = [] train_mious = [] train_class_ious = [] # 存储每个epoch的各类IoU for epoch in range(cfg.num_epochs): cfg.sampler.set_epoch(epoch) epoch_loss = 0.0 epoch_miou = 0.0 epoch_class_ious = np.zeros(21) # 初始化各类IoU累加器 batch_count = 0 seg_net.train() for data_dicts in tqdm(cfg.train_loader, desc=f"Pretrain Epoch {epoch+1}/{cfg.num_epochs}"): optimizer.zero_grad() pred = seg_net(data_dicts) device = pred.device seg_head = seg_head.to(device) loss = seg_head.get_loss(pred, data_dicts["gt_seg"].to(device)) loss.backward() optimizer.step() epoch_loss += loss.item() # import pdb;pdb.set_trace() # 计算mIoU class_ious = [] batch_miou, cls_iou = get_miou(pred, data_dicts, classes=[i for i in range(21)]) # for cls in range(5): # pred_mask = (pred.argmax(dim=1) == cls) # true_mask = (labels == cls) # intersection = (pred_mask & true_mask).sum().float() # union = (pred_mask | true_mask).sum().float() # iou = intersection / (union + 1e-8) # class_ious.append(iou.item()) epoch_miou += batch_miou epoch_class_ious += np.array(cls_iou) # 累加各类IoU batch_count += 1 # avg_loss = epoch_loss / len(cfg.dataloader) # avg_miou = epoch_miou / len(cfg.dataloader) # 计算epoch平均指标 avg_loss = epoch_loss / batch_count if batch_count > 0 else 0.0 avg_miou = epoch_miou / batch_count if batch_count > 0 else 0.0 avg_class_ious = epoch_class_ious / batch_count if batch_count > 0 else np.zeros(21) train_losses.append(avg_loss) train_mious.append(avg_miou) train_class_ious.append(avg_class_ious) # 存储各类IoU # 记录到TensorBoard writer.add_scalar('Loss/train', avg_loss, epoch) writer.add_scalar('mIoU/train', avg_miou, epoch) for cls, iou in enumerate(avg_class_ious): writer.add_scalar(f'IoU/class_{cls}', iou, epoch) print(f"Epoch {epoch+1}/{cfg.num_epochs} - Loss: {avg_loss:.3f}, mIoU: {avg_miou*100:.3f}") print("Class IoUs:") for cls, iou in enumerate(avg_class_ious): print(f" {cfg.class_names[cls]}: {iou*100:.3f}") # # 保存预训练模型 torch.save(seg_net.state_dict(), "polarnet_pretrained.pth") writer.close() # 绘制训练曲线 plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.plot(train_losses) plt.title("Supervised Training Loss") plt.xlabel("Epoch") plt.ylabel("Loss") plt.subplot(1, 2, 2) plt.plot(train_mious) plt.title("Supervised Training mIoU") plt.xlabel("Epoch") plt.ylabel("mIoU") plt.tight_layout() plt.savefig("supervised_training.png") return seg_net # 强化学习微调 def rl_finetune(cfg): # 状态维度 = 特征统计(1024*2) + 类别分布(5) + 各类IoU(5) state_dim = 256*2 + 21 + 21 action_dim = 6 # 6个连续动作;动作0调整学习率,动作1调整特征提取层权重,动作2调整分类头权重 # 初始化PPO智能体 agent = PPOAgent(state_dim, action_dim).to(device) if cfg.agent_path: agent.load_state_dict(torch.load(cfg.agent_path)) trainer = PPOTrainer(cfg.seg_net, agent, cfg) train_losses = [] train_rewards = [] train_mious = [] # 训练循环 for epoch in range(cfg.num_epochs_rl): avg_metrics = trainer.train_epoch(cfg.train_loader, epoch) # 记录指标 train_losses.append(avg_metrics['seg_loss']) train_rewards.append(avg_metrics['reward']) train_mious.append(avg_metrics['miou']) # trainer.metrics['loss'].append(avg_metrics['seg_loss']) # trainer.metrics['reward'].append(avg_metrics['reward']) # trainer.metrics['miou'].append(avg_metrics['miou']) # trainer.metrics['class_ious'].append(avg_metrics['class_ious']) # trainer.metrics['lr'].append(trainer.optimizer.param_groups[0]['lr']) # 保存最佳模型 if avg_metrics['miou'] > trainer.best_miou: trainer.best_miou = avg_metrics['miou'] torch.save(cfg.seg_net.state_dict(), "polarnet_rl_best.pth") torch.save(agent.state_dict(), "ppo_agent_best.pth") np.savetxt("best_class_ious.txt", avg_metrics['class_ious']) # 打印日志 print(f"\nRL Epoch {epoch+1}/{cfg.num_epochs_rl} Results:") print(f" Seg Loss: {avg_metrics['seg_loss']:.4f}") print(f" Reward: {avg_metrics['reward']:.4f}") print(f" mIoU: {avg_metrics['miou']*100:.3f} (Best: {trainer.best_miou*100:.3f})") print(f" Policy Loss: {avg_metrics['policy_loss']:.4f}") print(f" Value Loss: {avg_metrics['value_loss']:.4f}") print(f" Entropy Loss: {avg_metrics['entropy_loss']:.4f}") print(f" Learning Rate: {trainer.optimizer.param_groups[0]['lr']:.2e}") print(" Class IoUs:") for cls, iou in enumerate(avg_metrics['class_ious']): print(f" {cfg.class_names[cls]}: {iou:.4f}") # 保存最终模型和训练记录 torch.save(cfg.seg_net.state_dict(), "polarnet_rl_final.pth") torch.save(agent.state_dict(), "ppo_agent_final.pth") np.savetxt("training_metrics.txt", **trainer.metrics) print(f"\nTraining completed. Best mIoU: {trainer.best_miou:.4f}") trainer.close() # 绘制训练曲线 plt.figure(figsize=(15, 10)) plt.subplot(2, 2, 1) plt.plot(train_losses) plt.title("RL Training Loss") plt.xlabel("Epoch") plt.ylabel("Loss") plt.subplot(2, 2, 2) plt.plot(train_rewards) plt.title("Average Reward") plt.xlabel("Epoch") plt.ylabel("Reward") plt.subplot(2, 2, 3) plt.plot(train_mious) plt.title("RL Training mIoU") plt.xlabel("Epoch") plt.ylabel("mIoU") plt.subplot(2, 2, 4) plt.plot(train_losses, label='Loss') plt.plot(train_mious, label='mIoU') plt.title("Loss vs mIoU") plt.xlabel("Epoch") plt.legend() plt.tight_layout() plt.savefig("rl_training.png") return cfg.seg_net, agent # 模型评估 def evaluate_model(cfg): cfg.seg_net.eval() avg_miou = 0.0 total_miou = 0.0 class_ious = np.zeros(21) batch_count = 0 # 记录实际处理的batch数量 return avg_miou, class_ious with torch.no_grad(): for data_dicts in tqdm(cfg.val_loader, desc="Evaluating"): pred = cfg.seg_net(data_dicts) batch_miou, cls_iou = get_miou(pred, data_dicts, classes=[i for i in range(21)]) total_miou += batch_miou class_ious += cls_iou batch_count += 1 # avg_miou = total_miou / len(cfg.dataloader) # class_ious /= len(cfg.dataloader) # 计算平均值 avg_miou = total_miou / batch_count if batch_count > 0 else 0.0 class_ious = class_ious / batch_count if batch_count > 0 else np.zeros(21) print("\nEvaluation Results:") print(f"Overall mIoU: {avg_miou*100:.3f}") for cls, iou in enumerate(class_ious): print(f" {cfg.class_names[cls]}: {iou*100:.3f}") return avg_miou, class_ious def fast_hist(pred, label, n): k = (label >= 0) & (label < n) bin_count = np.bincount(n * label[k].astype(int) + pred[k], minlength=n**2) return bin_count[: n**2].reshape(n, n) def fast_hist_crop(output, target, unique_label): hist = fast_hist( output.flatten(), target.flatten(), np.max(unique_label) + 1 ) hist = hist[unique_label, :] hist = hist[:, unique_label] return hist def compute_miou_test(y_true, y_pred): from sklearn.metrics import confusion_matrix current = confusion_matrix(y_true, y_pred) intersection = np.diag(current) gt = current.sum(axis=1) pred = current.sum(axis=0) union = gt + pred - intersection iou_list = intersection / union.astype(np.float32) + 1e-8 return np.mean(iou_list), iou_list def get_miou(pred, target, classes=[i for i in range(21)]): # import pdb;pdb.set_trace() gt_val_grid_ind = target["grid_ind"] gt_val_pt_labs = target["labels_ori"] pred_labels = torch.argmax(pred, dim=1).cpu().detach().numpy() metric_data = [] miou_list = [] for bs, i_val_grid in enumerate(gt_val_grid_ind): val_grid_idx = pred_labels[ bs, i_val_grid[:, 1], i_val_grid[:, 0], i_val_grid[:, 2] ] # (N,) gt_val_pt_lab_idx = gt_val_pt_labs[bs] #(N,1) hist = fast_hist_crop( val_grid_idx, gt_val_pt_lab_idx, classes ) # (21, 21) hist_tensor = torch.from_numpy(hist).to(pred.device) metric_data.append(hist_tensor) # miou, iou_dict = compute_miou_test(gt_val_pt_lab_idx, val_grid_idx) # miou_list.append(miou) hist = sum(metric_data).cpu().numpy() iou_overall = np.diag(hist) / ((hist.sum(1) + hist.sum(0) - np.diag(hist)) + 1e-6) miou = np.nanmean(iou_overall) # print(metric_data) # print(iou_overall) # print(miou) # print(miou_list, np.nanmean(miou_list)) # import pdb;pdb.set_trace() return miou, iou_overall # 主函数 def main(args): # 第一阶段:监督学习预训练 print("="*50) print("Starting Supervised Pretraining...") print("="*50) cfg_file = "rl_seg/configs/rl_seg_leap.py" cfg = Config.fromfile(cfg_file) print('aaaaaaaa ',cfg.keys()) total_gpus, LOCAL_RANK = init_dist_pytorch( tcp_port=18888, local_rank=0, backend='nccl' ) cfg.local_rank = LOCAL_RANK dist_train = True train_dataset, train_dataloader, sampler = build_dataloader(dataset_cfg=cfg, data_path=cfg.train_data_path, workers=cfg.num_workers, samples_per_gpu=cfg.batch_size, num_gpus=cfg.num_gpus, dist=dist_train, pipeline=cfg.train_pipeline, training=True) cfg.train_loader = train_dataloader cfg.sampler = sampler seg_net = supervised_pretrain(cfg) val_dataset, val_dataloader, sampler = build_dataloader(dataset_cfg=cfg, data_path=cfg.val_data_path, workers=cfg.num_workers, samples_per_gpu=cfg.batch_size, num_gpus=cfg.num_gpus, dist=True, pipeline=cfg.val_pipeline, training=False) cfg.val_loader = val_dataloader cfg.sampler = sampler cfg.seg_net = seg_net # 评估预训练模型 print("\nEvaluating Pretrained Model...") pretrain_miou, pretrain_class_ious = evaluate_model(cfg) # 第二阶段:强化学习微调 print("\n" + "="*50) print("Starting RL Finetuning...") print("="*50) seg_net, ppo_agent = rl_finetune(cfg) # 评估强化学习优化后的模型 print("\nEvaluating RL Optimized Model...") rl_miou, rl_class_ious = evaluate_model(cfg) # 结果对比 print("\nPerformance Comparison:") print(f"Pretrained mIoU: {pretrain_miou*100:.3f}") print(f"RL Optimized mIoU: {rl_miou*100:.3f}") print(f"Improvement: {(rl_miou - pretrain_miou)*100:.3f} ({((rl_miou - pretrain_miou)/pretrain_miou)*100:.2f}%)") # 绘制各类别IoU对比 plt.figure(figsize=(10, 6)) x = np.arange(5) width = 0.35 plt.bar(x - width/2, pretrain_class_ious, width, label='Pretrained') plt.bar(x + width/2, rl_class_ious, width, label='RL Optimized') plt.xticks(x, cfg.class_names) plt.ylabel("IoU") plt.title("Per-Class IoU Comparison") plt.legend() plt.tight_layout() plt.savefig("class_iou_comparison.png") print("\nTraining completed successfully!") if __name__ == "__main__": def args_config(): parser = argparse.ArgumentParser(description='arg parser') parser.add_argument('--cfg_file', type=str, default="rl_seg/configs/rl_seg_leap.py", help='specify the config for training') parser.add_argument('--batch_size', type=int, default=16, required=False, help='batch size for training') parser.add_argument('--epochs', type=int, default=20, required=False, help='number of epochs to train for') parser.add_argument('--workers', type=int, default=10, help='number of workers for dataloader') parser.add_argument('--extra_tag', type=str, default='default', help='extra tag for this experiment') parser.add_argument('--ckpt', type=str, default=None, help='checkpoint to start from') parser.add_argument('--pretrained_model', type=str, default=None, help='pretrained_model') return parser.parse_args() args = args_config() main(args)
最新发布
07-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值