pointnet train第十四句 train_op = optimizer.minimize(loss, global_step=batch)

本文探讨了在深度学习中如何使用优化器通过梯度下降方法最小化损失函数,详细解释了train_op操作如何结合优化器与损失函数,实现参数更新,以及如何利用batch来跟踪优化过程。
train_op = optimizer.minimize(loss, global_step=batch)

第十三句初始化一个optimizer之后,通过minimize函数,最小化损失函数

用来计算梯度以及更新参数。保存的优化次数保存在batch里面

 

# -*- coding: utf-8 -*- """ DKT-DSC for Assistment2012 (完整可运行版) 最后更新: 2024-07-01 """ import os import sys import numpy as np import tensorflow.compat.v1 as tf os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.disable_v2_behavior() try: import psutil HAS_PSUTIL = True except ImportError: HAS_PSUTIL = False print("警告: psutil模块未安装,内存监控功能受限") from scipy.sparse import coo_matrix from tensorflow.contrib import rnn import pandas as pd from tqdm import tqdm from sklearn.metrics import mean_squared_error, r2_score, roc_curve, auc import math import random from datetime import datetime import warnings # 忽略警告 warnings.filterwarnings('ignore') # ==================== 配置部分 ==================== DATA_BASE_PATH = './data/' data_name = 'Assist_2012' # 模拟知识图谱路径(实际使用时替换为真实路径) KNOWLEDGE_GRAPH_PATHS = { 'graphml': './output_assist2012_gat_improved/knowledge_graph.graphml', 'nodes': './output_assist2012_gat_improved/graph_nodes.csv', 'edges': './output_assist2012_gat_improved/graph_edges.csv' } # 创建模拟数据路径 os.makedirs(DATA_BASE_PATH, exist_ok=True) os.makedirs(os.path.dirname(KNOWLEDGE_GRAPH_PATHS['nodes']), exist_ok=True) # ==================== 模拟数据生成 ==================== def generate_mock_data(): """生成模拟数据用于测试""" # 生成模拟训练数据 (300条记录) train_data = pd.DataFrame({ 'user_id': np.repeat(range(10), 30), 'problem_id': np.random.randint(1, 100, 300), 'correct': np.random.randint(0, 2, 300), 'start_time': np.arange(300) # 使用简单递增数字模拟时间戳 }) train_data.to_csv(os.path.join(DATA_BASE_PATH, f'{data_name}_train.csv'), index=False) # 生成模拟测试数据 (100条记录) test_data = pd.DataFrame({ 'user_id': np.repeat(range(5), 20), 'problem_id': np.random.randint(1, 100, 100), 'correct': np.random.randint(0, 2, 100), 'start_time': np.arange(100) + 300 # 时间戳接续训练数据 }) test_data.to_csv(os.path.join(DATA_BASE_PATH, f'{data_name}_test.csv'), index=False) # 生成模拟知识图谱节点数据 node_ids = [f'problem_{i}' for i in range(1, 101)] + \ [f'concept_{i}' for i in range(1, 21)] node_types = ['problem'] * 100 + ['concept'] * 20 mock_node_data = pd.DataFrame({ 'node_id': node_ids, 'type': node_types, 'difficulty': np.random.rand(120), 'avg_accuracy': np.random.rand(120), 'total_attempts': np.random.randint(100, 1000, 120), 'avg_confidence': np.random.rand(120) }) mock_node_data.to_csv(KNOWLEDGE_GRAPH_PATHS['nodes'], index=False) # 生成模拟边数据 sources = np.random.choice(node_ids, 500) targets = np.random.choice(node_ids, 500) weights = np.random.rand(500) mock_edge_data = pd.DataFrame({ 'source': sources, 'target': targets, 'weight': weights }) mock_edge_data.to_csv(KNOWLEDGE_GRAPH_PATHS['edges'], index=False) # 检查并生成模拟数据 if not os.path.exists(os.path.join(DATA_BASE_PATH, f'{data_name}_train.csv')): print("[系统] 检测到缺少数据文件,正在生成模拟数据...") generate_mock_data() # ==================== Flags配置 ==================== tf.flags.DEFINE_float("epsilon", 1e-8, "Adam优化器的epsilon值") tf.flags.DEFINE_float("l2_lambda", 0.003, "L2正则化系数") tf.flags.DEFINE_float("learning_rate", 2e-4, "学习率") tf.flags.DEFINE_float("max_grad_norm", 5.0, "梯度裁剪阈值") tf.flags.DEFINE_float("keep_prob", 0.7, "Dropout保留概率") tf.flags.DEFINE_integer("hidden_layer_num", 2, "隐藏层数量") tf.flags.DEFINE_integer("hidden_size", 64, "隐藏层大小") tf.flags.DEFINE_integer("evaluation_interval", 1, "评估间隔周期数") tf.flags.DEFINE_integer("batch_size", 32, "批次大小") # 减小批次大小以便在模拟数据上运行 tf.flags.DEFINE_integer("problem_len", 20, "问题序列长度") tf.flags.DEFINE_integer("epochs", 5, "训练周期数") # 减少epoch以便快速测试 tf.flags.DEFINE_boolean("allow_soft_placement", True, "允许软设备放置") tf.flags.DEFINE_boolean("log_device_placement", False, "记录设备放置信息") tf.flags.DEFINE_string("train_data_path", os.path.join(DATA_BASE_PATH, f'{data_name}_train.csv'), "训练数据路径") tf.flags.DEFINE_string("test_data_path", os.path.join(DATA_BASE_PATH, f'{data_name}_test.csv'), "测试数据路径") FLAGS = tf.flags.FLAGS # 焦点损失参数 FOCAL_LOSS_GAMMA = 2.0 FOCAL_LOSS_ALPHA = 0.25 # 学习率衰减参数 DECAY_STEPS = 100 DECAY_RATE = 0.97 # 早停参数 EARLY_STOP_PATIENCE = 3 def memory_usage(): if HAS_PSUTIL: try: process = psutil.Process(os.getpid()) return process.memory_info().rss / (1024 ** 2) except: return 0.0 return 0.0 # ==================== 时间戳处理工具函数 ==================== def parse_timestamp(timestamp_str): """尝试多种格式解析时间戳""" if isinstance(timestamp_str, (int, float, np.number)): return float(timestamp_str) if isinstance(timestamp_str, str): timestamp_str = timestamp_str.strip('"\' ') # 尝试常见时间格式 for fmt in ('%Y-%m-%d %H:%M:%S', '%m/%d/%Y %H:%M', '%Y-%m-%d', '%s'): try: if fmt == '%s': # Unix时间戳 return float(timestamp_str) dt = datetime.strptime(timestamp_str, fmt) return dt.timestamp() except ValueError: continue return np.nan # ==================== 知识图谱加载器 ==================== class KnowledgeGraphLoader: def __init__(self): self.node_features = None self.adj_matrix = None self.problem_to_node = {} self.node_id_map = {} self.static_node_count = 0 self._rows = None self._cols = None def load(self): print("\n[KG] 加载知识图谱...") try: if not os.path.exists(KNOWLEDGE_GRAPH_PATHS['nodes']): raise FileNotFoundError(f"节点文件未找到: {KNOWLEDGE_GRAPH_PATHS['nodes']}") if not os.path.exists(KNOWLEDGE_GRAPH_PATHS['edges']): raise FileNotFoundError(f"边文件未找到: {KNOWLEDGE_GRAPH_PATHS['edges']}") node_df = pd.read_csv(KNOWLEDGE_GRAPH_PATHS['nodes']) self.static_node_count = len(node_df) print(f"[KG] 总节点数: {self.static_node_count}") # 处理空值 print("[KG] 处理特征空值...") feature_cols = [col for col in node_df.columns if col not in ['node_id', 'type']] for col in feature_cols: if node_df[col].isna().any(): if 'accuracy' in col or 'confidence' in col: median_val = node_df[col].median() node_df[col] = node_df[col].fillna(median_val) else: for node_type in ['problem', 'concept']: mask = node_df['type'] == node_type type_median = node_df.loc[mask, col].median() node_df.loc[mask, col] = node_df.loc[mask, col].fillna(type_median) # 特征标准化 raw_features = node_df[feature_cols].values raw_features = np.nan_to_num(raw_features) feature_mean = np.mean(raw_features, axis=0) feature_std = np.std(raw_features, axis=0) + 1e-8 self.node_features = np.array( (raw_features - feature_mean) / feature_std, dtype=np.float32 ) # 创建映射 self.node_id_map = {row['node_id']: idx for idx, row in node_df.iterrows()} # 创建问题映射 self.problem_to_node = {} problem_count = 0 for idx, row in node_df.iterrows(): if row['type'] == 'problem': try: problem_id = int(row['node_id'].split('_')[1]) self.problem_to_node[problem_id] = idx problem_count += 1 except (IndexError, ValueError): continue print(f"[KG] 已加载 {problem_count} 个问题节点映射") # 加载边数据 edge_df = pd.read_csv(KNOWLEDGE_GRAPH_PATHS['edges']) rows, cols, data = [], [], [] grouped = edge_df.groupby('source') for src, group in tqdm(grouped, total=len(grouped), desc="处理边数据"): src_idx = self.node_id_map.get(src, -1) if src_idx == -1: continue neighbors = [] for _, row in group.iterrows(): tgt_idx = self.node_id_map.get(row['target'], -1) if tgt_idx != -1: neighbors.append((tgt_idx, row['weight'])) neighbors.sort(key=lambda x: x[1], reverse=True) top_k = min(100, len(neighbors)) for i in range(top_k): rows.append(src_idx) cols.append(neighbors[i][0]) data.append(neighbors[i][1]) # 添加自环 for i in range(self.static_node_count): rows.append(i) cols.append(i) data.append(1.0) # 创建稀疏矩阵 adj_coo = coo_matrix( (data, (rows, cols)), shape=(self.static_node_count, self.static_node_count), dtype=np.float32 ) self.adj_matrix = adj_coo.tocsc() self._rows = np.array(rows) self._cols = np.array(cols) except Exception as e: print(f"知识图谱加载失败: {str(e)}") raise # ==================== 图注意力层 ==================== class GraphAttentionLayer: def __init__(self, input_dim, output_dim, kg_loader, scope=None): self.kg_loader = kg_loader self.node_count = kg_loader.static_node_count self._rows = kg_loader._rows self._cols = kg_loader._cols with tf.variable_scope(scope or "GAT"): self.W = tf.get_variable( "W", [input_dim, output_dim], initializer=tf.initializers.variance_scaling( scale=0.1, mode='fan_avg', distribution='uniform') ) self.attn_kernel = tf.get_variable( "attn_kernel", [output_dim * 2, 1], initializer=tf.initializers.variance_scaling( scale=0.1, mode='fan_avg', distribution='uniform') ) self.bias = tf.get_variable( "bias", [output_dim], initializer=tf.zeros_initializer() ) def __call__(self, inputs): inputs = tf.clip_by_value(inputs, -5, 5) h = tf.matmul(inputs, self.W) h = tf.clip_by_value(h, -5, 5) h_src = tf.gather(h, self._rows) h_dst = tf.gather(h, self._cols) h_concat = tf.concat([h_src, h_dst], axis=1) edge_logits = tf.squeeze(tf.matmul(h_concat, self.attn_kernel), axis=1) edge_logits = tf.clip_by_value(edge_logits, -10, 10) edge_attn = tf.nn.leaky_relu(edge_logits, alpha=0.2) edge_indices = tf.constant(np.column_stack((self._rows, self._cols)), dtype=tf.int64) sparse_attn = tf.SparseTensor( indices=edge_indices, values=edge_attn, dense_shape=[self.node_count, self.node_count] ) sparse_attn_weights = tf.sparse_softmax(sparse_attn) output = tf.sparse_tensor_dense_matmul(sparse_attn_weights, h) output = tf.clip_by_value(output, -5, 5) output += self.bias output = tf.nn.elu(output) return output # ==================== 学生知识追踪模型 ==================== class StudentModel: def __init__(self, is_training, config): self.batch_size = config.batch_size # 添加这行 self.batch_size_tensor = tf.placeholder(tf.int32, [], name='batch_size_placeholder') self.num_skills = config.num_skills self.num_steps = config.num_steps self.current = tf.placeholder(tf.int32, [None, self.num_steps], name='current') self.next = tf.placeholder(tf.int32, [None, self.num_steps], name='next') self.target_id = tf.placeholder(tf.int32, [None], name='target_ids') self.target_correctness = tf.placeholder(tf.float32, [None], name='target_correctness') with tf.device('/gpu:0'), tf.variable_scope("KnowledgeGraph", reuse=tf.AUTO_REUSE): kg_loader = KnowledgeGraphLoader() kg_loader.load() kg_node_features = tf.constant(kg_loader.node_features, dtype=tf.float32) # 增强GAT结构 gat_output = kg_node_features for i in range(2): with tf.variable_scope(f"GAT_Layer_{i + 1}"): dim = 64 if i == 0 else 32 gat_layer = GraphAttentionLayer( input_dim=gat_output.shape[1] if i > 0 else kg_node_features.shape[1], output_dim=dim, kg_loader=kg_loader ) gat_output = gat_layer(gat_output) gat_output = tf.nn.leaky_relu(gat_output, alpha=0.1) self.skill_embeddings = gat_output with tf.variable_scope("FeatureProcessing"): # 使用实际batch_size的placeholder batch_size = tf.shape(self.next)[0] # 初始化方法1:使用tf.zeros_like和tile dummy_vector = tf.zeros([1, 1], dtype=tf.float32) history_init = tf.tile(dummy_vector, [batch_size, 1]) elapsed_init = tf.tile(dummy_vector, [batch_size, 1]) # 或者初始化方法2:直接使用tf.fill # history_init = tf.fill([batch_size, 1], 0.0) # elapsed_init = tf.fill([batch_size, 1], 0.0) current_indices = tf.minimum(self.current, kg_loader.static_node_count - 1) current_embed = tf.nn.embedding_lookup(self.skill_embeddings, current_indices) inputs = [] valid_mask = tf.cast(tf.not_equal(self.current, 0), tf.float32) answers_float = tf.cast(self.next, tf.float32) # 初始化历史和耗时特征 history = history_init elapsed_time = elapsed_init for t in range(self.num_steps): if t > 0: past_answers = answers_float[:, :t] past_valid_mask = valid_mask[:, :t] correct_count = tf.reduce_sum(past_answers * past_valid_mask, axis=1, keepdims=True) total_valid = tf.reduce_sum(past_valid_mask, axis=1, keepdims=True) history = correct_count / (total_valid + 1e-8) elapsed_time = tf.fill([batch_size, 1], tf.cast(t, tf.float32)) with tf.variable_scope(f"feature_extraction_t{t}"): # 基础特征 current_feat = current_embed[:, t, :] # 知识图谱特征 difficulty_feature = tf.gather( kg_loader.node_features[:, 0], tf.minimum(self.current[:, t], kg_loader.static_node_count - 1) ) difficulty_feature = tf.reshape(difficulty_feature, [-1, 1]) # 情感特征 affect_features = [] for i in range(1, 3): try: affect_feature = tf.gather( kg_loader.node_features[:, i], tf.minimum(self.current[:, t], kg_loader.static_node_count - 1) ) affect_feature = tf.reshape(affect_feature, [-1, 1]) affect_features.append(affect_feature) except Exception as e: tf.logging.warning(f"情感特征{i}提取失败: {str(e)}") affect_features.append(tf.zeros_like(difficulty_feature)) # 确保所有特征都是2维的 features_to_concat = [current_feat, history, elapsed_time, difficulty_feature] + affect_features features_to_concat = [ f if len(f.shape) == 2 else tf.reshape(f, [-1, 1]) for f in features_to_concat ] # 调试信息(可选) if is_training: features_to_concat = [ tf.Print(f, [tf.shape(f)], message=f"Feature {i} shape at step {t}: ") for i, f in enumerate(features_to_concat) ] combined = tf.concat(features_to_concat, axis=1) inputs.append(combined) # 增强RNN结构 with tf.variable_scope("RNN"): cells = [] for i in range(2): cell = rnn.LSTMCell( FLAGS.hidden_size, initializer=tf.orthogonal_initializer(), forget_bias=1.0 ) if is_training and FLAGS.keep_prob < 1.0: cell = rnn.DropoutWrapper(cell, output_keep_prob=FLAGS.keep_prob) cells.append(cell) stacked_cell = rnn.MultiRNNCell(cells) outputs, _ = tf.nn.dynamic_rnn( stacked_cell, tf.stack(inputs, axis=1), dtype=tf.float32 ) output = tf.reshape(outputs, [-1, FLAGS.hidden_size]) with tf.variable_scope("Output"): hidden = tf.layers.dense( output, units=32, activation=tf.nn.relu, kernel_initializer=tf.initializers.glorot_uniform() ) logits = tf.layers.dense( hidden, units=1, kernel_initializer=tf.initializers.glorot_uniform() ) self._all_logits = tf.clip_by_value(logits, -20, 20) selected_logits = tf.gather(tf.reshape(self._all_logits, [-1]), self.target_id) self.pred = tf.clip_by_value(tf.sigmoid(selected_logits), 1e-8, 1 - 1e-8) with tf.variable_scope("Loss"): labels = tf.clip_by_value(self.target_correctness, 0.05, 0.95) pos_weight = tf.reduce_sum(1.0 - labels) / (tf.reduce_sum(labels) + 1e-8) bce_loss = tf.nn.weighted_cross_entropy_with_logits( targets=labels, logits=selected_logits, pos_weight=pos_weight ) confidence_penalty = tf.reduce_mean( tf.square(tf.sigmoid(selected_logits) - 0.5) ) loss = tf.reduce_mean(bce_loss) + 0.1 * confidence_penalty l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name ]) * FLAGS.l2_lambda self.cost = loss + l2_loss # ==================== 数据加载 ==================== def read_data_from_csv_file(path, kg_loader, is_training=False): students = [] student_ids = [] max_skill = 0 missing_problems = set() if not os.path.exists(path): print(f"❌ 文件不存在: {path}") return [], [], [], 0, 0, 0 try: print(f"[数据] 加载数据文件: {path}") try: data_df = pd.read_csv(path) except Exception as e: print(f"CSV读取失败: {str(e)}") encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252'] for encoding in encodings: try: data_df = pd.read_csv(path, encoding=encoding) break except: continue if 'data_df' not in locals(): return [], [], [], 0, 0, 0 # 列名标准化 possible_columns = { 'user_id': ['user_id', 'userid', 'student_id', 'studentid'], 'problem_id': ['problem_id', 'problemid', 'skill_id', 'skillid'], 'correct': ['correct', 'correctness', 'answer', 'accuracy'], 'start_time': ['start_time', 'timestamp', 'time', 'date'] } actual_columns = {} for col_type, possible_names in possible_columns.items(): found = False for name in possible_names: if name in data_df.columns: actual_columns[col_type] = name found = True break if not found: print(f"❌ 错误: 找不到 {col_type} 列") return [], [], [], 0, 0, 0 data_df = data_df.rename(columns={ actual_columns['user_id']: 'user_id', actual_columns['problem_id']: 'problem_id', actual_columns['correct']: 'correct', actual_columns['start_time']: 'start_time' }) # 时间戳转换 print("[数据] 转换时间戳...") timestamp_col = data_df['start_time'] if isinstance(timestamp_col.iloc[0], str): try: data_df['start_time'] = timestamp_col.astype(float) except ValueError: parsed_times = timestamp_col.apply(parse_timestamp) nan_count = parsed_times.isna().sum() if nan_count > 0: print(f"⚠️ 警告: {nan_count}个时间戳无法解析,将设为0") parsed_times = parsed_times.fillna(0) data_df['start_time'] = parsed_times else: data_df['start_time'] = timestamp_col.astype(float) # 按学生分组 grouped = data_df.groupby('user_id') for user_id, group in tqdm(grouped, total=len(grouped), desc="处理学生数据"): try: group = enhanced_data_validation(group, kg_loader) if group is None: continue problems = group['problem_id'].values answers = group['correct'].values.astype(int) timestamps = group['start_time'].values.astype(float) valid_data = [] invalid_count = 0 for i, (p, a) in enumerate(zip(problems, answers)): if p in kg_loader.problem_to_node and a in (0, 1): valid_data.append((p, a)) else: invalid_count += 1 if p != 0 and p not in missing_problems: missing_problems.add(p) if len(valid_data) < 2: continue problems, answers = zip(*valid_data) n_split = (len(problems) + FLAGS.problem_len - 1) // FLAGS.problem_len for k in range(n_split): start = k * FLAGS.problem_len end = (k + 1) * FLAGS.problem_len seg_problems = list(problems[start:end]) seg_answers = list(answers[start:end]) if len(seg_problems) < FLAGS.problem_len: pad_len = FLAGS.problem_len - len(seg_problems) seg_problems += [0] * pad_len seg_answers += [0] * pad_len mapped_problems = [kg_loader.problem_to_node.get(p, 0) for p in seg_problems] students.append(([user_id, k], mapped_problems, seg_answers)) max_skill = max(max_skill, max(mapped_problems)) student_ids.append(user_id) except Exception as e: print(f"处理学生 {user_id} 时出错: {str(e)}") continue except Exception as e: print(f"数据加载失败: {str(e)}") return [], [], [], 0, 0, 0 return students, [], student_ids, max_skill, 0, 0 def enhanced_data_validation(group, kg_loader): """增强数据验证""" problems = group['problem_id'].values timestamps = group['start_time'].values.astype(float) valid_indices = np.where(~np.isnan(timestamps))[0] if len(valid_indices) > 1: time_diffs = np.diff(timestamps[valid_indices]) if np.any(time_diffs < 0): sort_idx = np.argsort(timestamps) group = group.iloc[sort_idx].reset_index(drop=True) valid_mask = [p in kg_loader.problem_to_node for p in problems] if not any(valid_mask): return None return group[valid_mask] # ==================== 训练流程 ==================== def run_epoch(session, model, data, run_type, eval_op, verbose=False): """执行一个epoch的训练或评估 Args: session: TF会话 model: 模型对象 data: 输入数据 run_type: '训练'或'测试' eval_op: 训练op或tf.no_op() verbose: 是否显示详细进度 Returns: dict: 包含loss, auc, rmse, r2的字典 """ preds = [] labels = [] total_loss = 0.0 processed_count = 0 # 禁用TF调试信息 tf.logging.set_verbosity(tf.logging.ERROR) index = 0 batch_size = model.batch_size # 可选:使用tqdm进度条(verbose模式下) iterator = tqdm(range(0, len(data), batch_size), desc=f"{run_type}处理中") if verbose else range(0, len(data), batch_size) for start in iterator: end = min(start + batch_size, len(data)) batch_data = data[start:end] # 准备批次数据 current_batch, next_batch, target_ids, target_correctness = [], [], [], [] for idx, (stu_id, problems, answers) in enumerate(batch_data): valid_length = sum(1 for p in problems if p != 0) if valid_length < 1: continue current_batch.append(problems) next_batch.append(answers) last_step = valid_length - 1 target_ids.append(idx * model.num_steps + last_step) target_correctness.append(answers[last_step]) if not current_batch: continue actual_batch_size = len(current_batch) feed_dict = { model.current: np.array(current_batch, dtype=np.int32), model.next: np.array(next_batch, dtype=np.int32), model.target_id: np.array(target_ids, dtype=np.int32), model.target_correctness: np.array(target_correctness, dtype=np.float32) } try: if eval_op != tf.no_op(): _, pred, loss = session.run( [eval_op, model.pred, model.cost], feed_dict=feed_dict ) else: pred, loss = session.run( [model.pred, model.cost], feed_dict=feed_dict ) preds.extend(pred.flatten().tolist()) labels.extend(target_correctness) total_loss += loss * actual_batch_size processed_count += actual_batch_size except Exception as e: print(f"\n{run_type}错误 (批次 {start}-{end}): {str(e)}", file=sys.stderr) continue # 计算指标 if processed_count == 0: return None avg_loss = total_loss / processed_count # 确保标签和预测值在有效范围内 labels = np.clip(np.array(labels), 1e-7, 1 - 1e-7) preds = np.clip(np.array(preds), 1e-7, 1 - 1e-7) metrics = { 'loss': avg_loss, 'auc': roc_auc_score(labels, preds) if len(set(labels)) > 1 else 0.5, 'rmse': np.sqrt(mean_squared_error(labels, preds)), 'r2': r2_score(labels, preds) } return metrics def main(_): """主训练流程""" # 1. 加载配置和数据 config = ModelConfig() # 假设已定义 train_data, test_data = load_data() # 假设已定义 # 2. 构建模型 with tf.variable_scope("Model", reuse=False): train_model = StudentModel(is_training=True, config=config) with tf.variable_scope("Model", reuse=True): test_model = StudentModel(is_training=False, config=config) # 3. 创建会话 sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: # 4. 初始化变量 sess.run(tf.global_variables_initializer()) # 5. 训练循环 best_auc = 0.0 for epoch in range(1, FLAGS.max_epochs + 1): # 训练阶段 train_metrics = run_epoch( sess, train_model, train_data, '训练', train_op, # train_op应已定义 verbose=(epoch % FLAGS.display_freq == 0) ) # 测试阶段 test_metrics = run_epoch( sess, test_model, test_data, '测试', tf.no_op(), verbose=False ) # 6. 输出关键指标 print(f"Epoch {epoch}") print( f"训练集 - 损失: {train_metrics['loss']:.4f}, RMSE: {train_metrics['rmse']:.4f}, AUC: {train_metrics['auc']:.4f}, R²: {train_metrics['r2']:.4f}") print( f"测试集 - 损失: {test_metrics['loss']:.4f}, RMSE: {test_metrics['rmse']:.4f}, AUC: {test_metrics['auc']:.4f}, R²: {test_metrics['r2']:.4f}") sys.stdout.flush() # 7. 保存最佳模型 if test_metrics['auc'] > best_auc: best_auc = test_metrics['auc'] saver.save(sess, FLAGS.model_path) # saver应已定义 print("训练完成!") print(f"最佳测试AUC: {best_auc:.4f}") if __name__ == "__main__": # 生成模拟数据(仅当真实数据不存在时) if not os.path.exists(FLAGS.train_data_path) or not os.path.exists(FLAGS.test_data_path): generate_mock_data() tf.app.run() 在这个基础上修改得到的完整代码,你给的不完整,不要省略!!!!
07-02
import numpy as np import pandas as pd import tensorflow as tf import tensorflow_probability as tfp import math import matplotlib.pyplot as plt import seaborn as sns import scipy.stats from collections import defaultdict import random from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error # 设置TensorFlow兼容性 tf.compat.v1.enable_eager_execution() tf.compat.v1.disable_v2_behavior() class MDN: def __init__(self, n_mixtures=-1, dist='laplace', input_neurons=1000, hidden_neurons=[25], gmm_boost=False, optimizer='adam', learning_rate=0.001, early_stopping=10, tf_mixture_family=True, input_activation='relu', hidden_activation='tanh', n_outputs=1): tf.compat.v1.reset_default_graph() self.n_mixtures = n_mixtures self.input_neurons = input_neurons self.hidden_neurons = hidden_neurons self.gmm_boost = gmm_boost self.learning_rate = learning_rate self.early_stopping = early_stopping self.tf_mixture_family = tf_mixture_family self.optimizer = optimizer self.input_activation = input_activation self.hidden_activation = hidden_activation self.dist = dist self.n_outputs = n_outputs self.x_scaler = StandardScaler() self.y_scaler = StandardScaler() def fit(self, X, Y, epochs, batch_size): # 数据归一化 X = self.x_scaler.fit_transform(X) Y = self.y_scaler.fit_transform(Y) EPOCHS = epochs BATCH_SIZE = batch_size n = len(X) XY = np.concatenate((X, Y), axis=1) self._X = X.copy() hidden_neurons = self.hidden_neurons # 确定混合分量数量 if self.n_mixtures == -1: lowest_bic = np.infty bic = [] n_components_range = range(1, 7) cv_types = ['spherical', 'tied', 'diag', 'full'] for cv_type in cv_types: for n_components in n_components_range: gmm = GaussianMixture(n_components=n_components, covariance_type=cv_type, max_iter=10000) gmm.fit(XY) bic.append(gmm.bic(XY)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm self.n_mixtures = n_components self.n_mixtures = max(2, self.n_mixtures) # 至少2个混合分量 self._y = Y.copy() # TensorFlow数据管道 dataset = tf.compat.v1.data.Dataset \ .from_tensor_slices((X, Y)) \ .repeat(EPOCHS).shuffle(len(X)).batch(BATCH_SIZE) iter_ = tf.compat.v1.data.make_one_shot_iterator(dataset) x, y = iter_.get_next() K = self.n_mixtures self.K = K self.x = x # 激活函数选择 activations = { 'crelu': tf.nn.crelu, 'relu6': tf.nn.relu6, 'elu': tf.nn.elu, 'selu': tf.nn.selu, 'leaky_relu': tf.nn.leaky_relu, 'relu': tf.nn.relu, 'swish': tf.nn.swish, 'tanh': tf.nn.tanh, 'linear': None, 'softplus': tf.nn.softplus, 'sigmoid': tf.nn.sigmoid, 'softmax': tf.nn.softmax } input_actv = activations.get(self.input_activation.lower(), tf.nn.relu) h_actv = activations.get(self.hidden_activation.lower(), tf.nn.relu) n_layer = len(hidden_neurons) # 神经网络架构 if n_layer < 1: self.layer_last = tf.layers.dense(x, units=self.input_neurons, activation=input_actv) else: self.layer_1 = tf.layers.dense(x, units=self.input_neurons, activation=input_actv) for i in range(2, n_layer + 2): n_neurons = hidden_neurons[i - 2] if i == n_layer + 1: exec(f'self.layer_last = tf.layers.dense(self.layer_{i - 1}, units=n_neurons, activation=h_actv)') else: exec(f'self.layer_{i} = tf.layers.dense(self.layer_{i - 1}, units=n_neurons, activation=h_actv)') # 输出层 - 修改为适应多目标 self.mu = tf.layers.dense(self.layer_last, units=self.n_outputs * K, activation=None, name="mu") self.var = tf.exp(tf.layers.dense(self.layer_last, units=self.n_outputs * K, activation=None, name="sigma")) self.pi = tf.layers.dense(self.layer_last, units=K, activation=tf.nn.softmax, name="mixing") # 损失函数 if self.tf_mixture_family: self.mixture_distribution = tfp.distributions.Categorical(probs=self.pi) if self.dist.lower() == 'normal': self.distribution = tfp.distributions.Normal( loc=tf.reshape(self.mu, [-1, K, self.n_outputs]), scale=tf.reshape(self.var, [-1, K, self.n_outputs]) ) elif self.dist.lower() in ['laplacian', 'laplace']: self.distribution = tfp.distributions.Laplace( loc=tf.reshape(self.mu, [-1, K, self.n_outputs]), scale=tf.reshape(self.var, [-1, K, self.n_outputs]) ) else: self.distribution = tfp.distributions.Normal( loc=tf.reshape(self.mu, [-1, K, self.n_outputs]), scale=tf.reshape(self.var, [-1, K, self.n_outputs]) ) self.likelihood = tfp.distributions.MixtureSameFamily( mixture_distribution=self.mixture_distribution, components_distribution=self.distribution) # 重塑y以匹配输出维度 y_reshaped = tf.reshape(y, [-1, 1, self.n_outputs]) self.log_likelihood = -self.likelihood.log_prob(y_reshaped) self.mean_loss = tf.reduce_mean(self.log_likelihood) else: # 简化的损失函数实现 self.mean_loss = tf.reduce_mean(tf.square(y - self.mu[:, 0:self.n_outputs])) # 优化器 self.global_step = tf.Variable(0, trainable=False) optimizers = { 'adam': tf.compat.v1.train.AdamOptimizer, 'adadelta': tf.compat.v1.train.AdadeltaOptimizer, 'adagrad': tf.compat.v1.train.AdagradOptimizer, 'gradientdescent': tf.compat.v1.train.GradientDescentOptimizer, 'rmsprop': tf.compat.v1.train.RMSPropOptimizer } optimizer_class = optimizers.get(self.optimizer.lower(), tf.compat.v1.train.AdamOptimizer) self.train_op = optimizer_class(learning_rate=self.learning_rate).minimize(self.mean_loss) self.init = tf.compat.v1.global_variables_initializer() # 训练模型 self.sess = tf.compat.v1.Session() self.sess.run(self.init) best_loss = 1e+10 self.stopping_step = 0 for i in range(EPOCHS * (n // BATCH_SIZE)): _, loss = self.sess.run([self.train_op, self.mean_loss]) if loss < best_loss: self.stopping_step = 0 best_loss = loss print(f"Epoch: {i} Loss: {loss:.3f}") else: self.stopping_step += 1 if self.stopping_step >= self.early_stopping: print(f"Early stopping triggered at step: {i} loss: {loss}") break def predict_mean(self, X_pred): """预测均值""" X_pred = self.x_scaler.transform(X_pred) mu = self.sess.run(self.mu, feed_dict={self.x: X_pred}) # 重塑mu以提取每个目标的均值 mu_reshaped = mu.reshape(-1, self.K, self.n_outputs) # 计算加权平均 pi = self.sess.run(self.pi, feed_dict={self.x: X_pred}) weighted_mu = np.zeros((len(X_pred), self.n_outputs)) for i in range(len(X_pred)): for j in range(self.n_outputs): weighted_mu[i, j] = np.sum(pi[i, :] * mu_reshaped[i, :, j]) # 反归一化 weighted_mu = self.y_scaler.inverse_transform(weighted_mu) return weighted_mu def predict_dist(self, X_pred): """预测分布参数""" X_pred = self.x_scaler.transform(X_pred) mu, var, pi = self.sess.run([self.mu, self.var, self.pi], feed_dict={self.x: X_pred}) # 重塑参数 mu = mu.reshape(-1, self.K, self.n_outputs) var = var.reshape(-1, self.K, self.n_outputs) # 反归一化 mu_unscaled = np.zeros_like(mu) for k in range(self.K): mu_unscaled[:, k, :] = self.y_scaler.inverse_transform(mu[:, k, :]) return mu_unscaled, var, pi class Individual: def __init__(self): self.solution = None self.objective = defaultdict() self.n = 0 self.rank = 0 self.S = [] self.distance = 0 self.feasible = True def bound_process(self, bound_min, bound_max): """处理边界约束""" for i, item in enumerate(self.solution): if item > bound_max[i]: self.solution[i] = bound_max[i] elif item < bound_min[i]: self.solution[i] = bound_min[i] def check_constraints(self): """检查制造约束""" # 壁厚约束 if self.solution[0] < 0.1 or self.solution[0] > 0.5: self.feasible = False # 角度约束 if self.solution[1] < 10 or self.solution[1] > 80: self.feasible = False # 尺寸约束 if self.solution[2] < 5 or self.solution[2] > 10: self.feasible = False if self.solution[3] < 10 or self.solution[3] > 20: self.feasible = False return self.feasible def fast_non_dominated_sort(P): """非支配排序""" F = defaultdict(list) for p in P: p.S = [] p.n = 0 for q in P: if dominate(p, q): p.S.append(q) elif dominate(q, p): p.n += 1 if p.n == 0: p.rank = 1 F[1].append(p) i = 1 while F[i]: Q = [] for p in F[i]: for q in p.S: q.n -= 1 if q.n == 0: q.rank = i + 1 Q.append(q) i += 1 F[i] = Q return F def dominate(a, b): """判断a是否支配b""" # 对于最小化问题,所有目标值都小于等于且至少一个严格小于 better_or_equal = True strictly_better = False for key in a.objective: if a.objective[key] > b.objective[key]: better_or_equal = False break elif a.objective[key] < b.objective[key]: strictly_better = True return better_or_equal and strictly_better def crowding_distance_assignment(L): """拥挤度计算""" l = len(L) for i in range(l): L[i].distance = 0 for m in L[0].objective.keys(): L.sort(key=lambda x: x.objective[m]) L[0].distance = float('inf') L[l - 1].distance = float('inf') f_max = L[l - 1].objective[m] f_min = L[0].objective[m] if f_max == f_min: continue for i in range(1, l - 1): L[i].distance += (L[i + 1].objective[m] - L[i - 1].objective[m]) / (f_max - f_min) def binary_tournament(ind1, ind2): """二元锦标赛选择""" if ind1.rank != ind2.rank: return ind1 if ind1.rank < ind2.rank else ind2 elif ind1.distance != ind2.distance: return ind1 if ind1.distance > ind2.distance else ind2 else: return ind1 def crossover_mutation(parent1, parent2, eta, bound_min, bound_max): """交叉和变异""" poplength = len(parent1.solution) offspring1 = Individual() offspring2 = Individual() offspring1.solution = np.empty(poplength) offspring2.solution = np.empty(poplength) # 模拟二进制交叉 for i in range(poplength): rand = random.random() beta = (rand * 2) ** (1 / (eta + 1)) if rand < 0.5 else (1 / (2 * (1 - rand))) ** (1.0 / (eta + 1)) offspring1.solution[i] = 0.5 * ((1 + beta) * parent1.solution[i] + (1 - beta) * parent2.solution[i]) offspring2.solution[i] = 0.5 * ((1 - beta) * parent1.solution[i] + (1 + beta) * parent2.solution[i]) # 多项式变异 for i in range(poplength): if random.random() < 0.1: # 变异概率 mu = random.random() delta = (2 * mu) ** (1 / (eta + 1)) if mu < 0.5 else (1 - (2 * (1 - mu)) ** (1 / (eta + 1))) offspring1.solution[i] += delta # 边界处理 offspring1.bound_process(bound_min, bound_max) offspring2.bound_process(bound_min, bound_max) return [offspring1, offspring2] def make_new_pop(P, eta, bound_min, bound_max, objective_fun): """生成新种群""" popnum = len(P) Q = [] for _ in range(int(popnum / 2)): # 选择父代 i, j = random.sample(range(popnum), 2) parent1 = binary_tournament(P[i], P[j]) i, j = random.sample(range(popnum), 2) parent2 = binary_tournament(P[i], P[j]) # 确保父代不同 while (parent1.solution == parent2.solution).all(): i, j = random.sample(range(popnum), 2) parent2 = binary_tournament(P[i], P[j]) # 交叉变异产生子代 offspring = crossover_mutation(parent1, parent2, eta, bound_min, bound_max) # 计算子代目标函数值 for child in offspring: child.calculate_objective(objective_fun) Q.append(child) return Q def NSGAII(mdn_ea, mdn_npr, generations=100, popnum=100, eta=1): """NSGA-II主算法""" # 设计变量边界 poplength = 4 # 壁厚t, 角度θ, 水平边长L₁, 高度H bound_min = np.array([0.1, 10, 5, 10]) bound_max = np.array([0.5, 80, 10, 20]) # 目标函数 def objective_fun(x): individual = Individual() individual.solution = x if not individual.check_constraints(): return {1: float('inf'), 2: float('inf')} # 违反约束返回极大值 # 使用MDN预测 ea_pred = mdn_ea.predict_mean(x.reshape(1, -1))[0, 0] npr_pred = mdn_npr.predict_mean(x.reshape(1, -1))[0, 0] # 转换为最小化问题 return {1: -ea_pred, 2: npr_pred} # 最大化EA(取负),最小化NPR # 初始化种群 P = [] for i in range(popnum): ind = Individual() ind.solution = np.random.rand(poplength) * (bound_max - bound_min) + bound_min ind.bound_process(bound_min, bound_max) ind.calculate_objective(objective_fun) P.append(ind) # 非支配排序 F = fast_non_dominated_sort(P) # 生成子代 Q = make_new_pop(P, eta, bound_min, bound_max, objective_fun) # 主循环 for gen_cur in range(generations): R_t = P + Q # 合并父代和子代 F = fast_non_dominated_sort(R_t) P_n = [] # 新父代 i = 1 while len(P_n) + len(F[i]) < popnum: crowding_distance_assignment(F[i]) P_n += F[i] i += 1 # 按拥挤度排序并选择 F[i].sort(key=lambda x: x.distance, reverse=True) P_n += F[i][:popnum - len(P_n)] # 生成新子代 Q_n = make_new_pop(P_n, eta, bound_min, bound_max, objective_fun) # 更新种群 P = P_n Q = Q_n # 打印进度 if gen_cur % 10 == 0: print(f"Generation {gen_cur}: Pareto front size = {len(F[1])}") # 提取Pareto前沿 pareto_front = [ind for ind in P if ind.rank == 1 and ind.feasible] return pareto_front def generate_training_data(n_samples=1000): """生成训练数据(替代有限元模拟)""" # 设计变量: [壁厚t, 角度θ, 水平边长L₁, 高度H] X = np.zeros((n_samples, 4)) X[:, 0] = np.random.uniform(0.1, 0.5, n_samples) # 壁厚: 0.1-0.5 X[:, 1] = np.random.uniform(10, 80, n_samples) # 角度: 10-80 X[:, 2] = np.random.uniform(5, 10, n_samples) # 水平边长: 5-10 X[:, 3] = np.random.uniform(10, 20, n_samples) # 高度: 10-20 # 模拟目标值(实际应用中应从有限元模拟获取) # EA = 吸能效率,NPR = 负泊松比 Y_ea = 0.5 * X[:, 0] + 0.3 * X[:, 1] - 0.2 * X[:, 2] + 0.1 * X[:, 3] + np.random.normal(0, 0.1, n_samples) Y_npr = -0.3 * X[:, 0] - 0.4 * X[:, 1] + 0.2 * X[:, 2] - 0.1 * X[:, 3] + np.random.normal(0, 0.05, n_samples) # 确保EA为正,NPR为负 Y_ea = np.abs(Y_ea) Y_npr = -np.abs(Y_npr) return X, Y_ea, Y_npr def plot_pareto_front(pareto_front): """绘制Pareto前沿""" ea_values = [-ind.objective[1] for ind in pareto_front] # 注意:之前为了最小化取了负号 npr_values = [ind.objective[2] for ind in pareto_front] plt.figure(figsize=(10, 6)) plt.scatter(npr_values, ea_values, alpha=0.7) plt.xlabel('Negative Poisson Ratio (NPR)') plt.ylabel('Energy Absorption (EA)') plt.title('Pareto Front: EA vs NPR') plt.grid(True) plt.show() def main(): """主函数""" print("1. 生成训练数据...") X, Y_ea, Y_npr = generate_training_data(1000) print("2. 训练MDN模型...") # 划分训练测试集 X_train, X_test, Y_ea_train, Y_ea_test = train_test_split(X, Y_ea, test_size=0.2, random_state=42) _, _, Y_npr_train, Y_npr_test = train_test_split(X, Y_npr, test_size=0.2, random_state=42) # 训练EA模型 mdn_ea = MDN(n_mixtures=3, n_outputs=1, hidden_neurons=[20, 15]) mdn_ea.fit(X_train, Y_ea_train.reshape(-1, 1), epochs=300, batch_size=32) # 训练NPR模型 mdn_npr = MDN(n_mixtures=3, n_outputs=1, hidden_neurons=[20, 15]) mdn_npr.fit(X_train, Y_npr_train.reshape(-1, 1), epochs=300, batch_size=32) # 评估模型 ea_pred = mdn_ea.predict_mean(X_test) npr_pred = mdn_npr.predict_mean(X_test) print(f"EA模型R2: {r2_score(Y_ea_test, ea_pred):.4f}") print(f"NPR模型R2: {r2_score(Y_npr_test, npr_pred):.4f}") print("3. 运行NSGA-II优化...") pareto_front = NSGAII(mdn_ea, mdn_npr, generations=50, popnum=50) print("4. 优化结果分析...") print(f"找到 {len(pareto_front)} 个Pareto最优解") # 选择最优解(根据偏好) if pareto_front: # 选择EA最大且NPR最小的解 best_idx = 0 best_score = -pareto_front[0].objective[1] - pareto_front[0].objective[2] # 最大化EA,最小化NPR for i, ind in enumerate(pareto_front[1:], 1): score = -ind.objective[1] - ind.objective[2] if score > best_score: best_score = score best_idx = i best_solution = pareto_front[best_idx] print(f"最优解参数: 壁厚={best_solution.solution[0]:.3f}, 角度={best_solution.solution[1]:.3f}, " f"水平边长={best_solution.solution[2]:.3f}, 高度={best_solution.solution[3]:.3f}") # 预测性能 ea_pred = mdn_ea.predict_mean(best_solution.solution.reshape(1, -1))[0, 0] npr_pred = mdn_npr.predict_mean(best_solution.solution.reshape(1, -1))[0, 0] print(f"预测性能: EA={ea_pred:.4f}, NPR={npr_pred:.4f}") # 绘制Pareto前沿 plot_pareto_front(pareto_front) else: print("未找到可行解") if __name__ == "__main__": main() File "E:/test_p/learning/main.py", line 574, in <module> main() File "E:/test_p/learning/main.py", line 527, in main mdn_ea.fit(X_train, Y_ea_train.reshape(-1, 1), epochs=300, batch_size=32) File "E:/test_p/learning/main.py", line 156, in fit components_distribution=self.distribution) File "E:\py\Anaconda3\envs\py37tf1_env\lib\site-packages\decorator.py", line 232, in fun return caller(func, *(extras + args), **kw) File "E:\py\Anaconda3\envs\py37tf1_env\lib\site-packages\tensorflow_probability\python\distributions\distribution.py", line 276, in wrapped_init default_init(self_, *args, **kwargs) File "E:\py\Anaconda3\envs\py37tf1_env\lib\site-packages\tensorflow_probability\python\distributions\mixture_same_family.py", line 224, in __init__ "({})".format(km, kc)) ValueError: `mixture_distribution components` (3) does not equal `components_distribution.batch_shape[-1]` (1)
09-23
任务描述 本关任务:本关将结合此前各个关卡,正式训练并保存一个网络,并能够随时加载这个模型对位置数据集进行预测。 相关知识 为了完成本关任务,你需要掌握: 1.如何训练一个网络; 2.如何保存一个网络; 3.如何加载复用一个网络。 如何训练一个网络 训练一个TensorFlow搭建的神经网络,只需要调用tf.Session().run(),需要run的算符就是在网络中定义的train算符;另外如果在模型的搭建过程中,存在placeholder,则需要通过run()的参数feed_dict传入这些placeholder的值,传入的参数是以字典的形式,然后字典的key是搭建过程中placeholder对应的变量名,value就是对应的需要传入的值。 示例如下: BNTraining = tf.placeholder(tf.bool, name='BNTraining') keeProb = tf.placeholder(tf.float32, shape=(), name='dropout_keep_prob') batchImgInput = tf.placeholder(tf.float32, shape=(None, img_size, img_size, n_channels), name='batchImgInput') labels = tf.placeholder(tf.float32, shape=(None, label_size), name='Labels') # .....(此处省略中间网络结构) # 定义网络的损失和优化方案 loss = tf.reduce_mean(tf.cast(tf.nn.softmax_cross_entropy_with_logits_v2(logits=dense2, labels=labels), dtype=tf.float32)) train = tf.train.AdamOptimizer().minimize(loss) with tf.Session() as sess: #特别重要的两行,一定不要掉了 init = tf.global_variables_initializer() sess.run(init) #run train 算符调整网络参数,即完成一个batch的训练 _, cur_loss=sess.run([train, loss],feed_dict={batchImgInput: X, labels: Y, keeProb: keep_prob_train, BNTraining: True}) 如何保存一个网络 调用tf.train.Saver()来完成网络的保存,使用方法非常简单,两行代码即可。 示例如下: # 与session同样的级别定义一个saver。 saver = tf.train.Saver() # 在需要保存模型的时候使用save(),必须传入的参数是session对象和保存的路径。 saver.save(sess, "Model/ResNet") 这里还需要特别提醒一些东西~ 1.Saver()的一些参数 常用的参数: var_list:定义需要保存的变量,如果None的话是默认保存所有可保存的变量,API的用词是If None, defaults to the list of all saveable objects.,所以不管是不是trainable variable都会被保存下来,并不需要设定var_list=tf.global_variables(),因为看到网上很多人这么写,尤其是在提到用tf.layers.batch_normalization()的时候,所以我提一下。如果错了的话,也请大家提醒一下我。 max_to_keep:保存最近的几个网络,如果你隔一段时间就保存一次,训练时间长的话,肯定就会保存特别多的网络,这是没有必要的,这个参数默认是5,只保留最近的五个。 keep_checkpoint_every_n_hours:从时间的角度出发,多久保存一次模型,默认是10,000小时。 还有一些参数,大家有兴趣了解可以去看官方 API。 2.Save()的一些参数 其实主要就一个参数global_step,可以标记保存的是第几次被保存的。这个直接看官方示例,非常简单易懂: saver.save(sess, 'my-model', global_step=0) ==> filename: 'my-model-0' ... saver.save(sess, 'my-model', global_step=1000) ==> filename: 'my-model-1000' 3.保存模型后输出的文件 save()之后,设定的文件夹下,将会出现四个文件,如下: |Model | |--checkpoint | |--Model.meta | |--Model.data-00000-of-00001 | |--Model.index checkpoint文件是个文本文件,可以直接打开看,示例如下: model_checkpoint_path: "ResNet" all_model_checkpoint_paths: "ResNet" 它定义了模型保存的路径。 meta定义了模型的图结构,我们知道TensorFLow的模型叫做一个Graph,里面有很多的算符operator,那么这个meta文件就保存了这张图的结构,可以理解为模型的结构。 index和data保存了模型的所有的变量和对应的数值,方便后续加载使用。 如何加载复用一个网络 如果我们只保存最优的一个网络,加载该模型只需要两行代码。 saver = tf.train.import_meta_graph('Model/ResNet.meta') saver.restore(sess,tf.train.latest_checkpoint('Model')) 第一步,调用tf.train.import_meta_graph(),加载网络结构,第二步,调用restore()加载变量的值。 接下来对于需要调用的变量需要先找到它们(使用函数get_tensor_by_name(),参数由两部分组成,冒号前是算符的名字,冒号后是算符的输出张量的序号,有的图结构中,一个算符可能会输出不止一次张量,本次实训不涉及,大家都填0就行),然后使用它,placeholder也是如此,先找到,然后使用feed_dict的形式传入你想要的值后,调用session.run(),示例如下: # 四个placeholder saver = tf.train.import_meta_graph('Model/ResNet.meta') saver.restore(sess, tf.train.latest_checkpoint('Model')) # 调用tf.get_default_graph()获取当前进程的图,前面两行代码已经加载了图了。 graph = tf.get_default_graph() batchImgInput = graph.get_tensor_by_name("batchImgInput:0") labels = graph.get_tensor_by_name("Labels:0") keeProb = graph.get_tensor_by_name("dropout_keep_prob:0") BNTraining = graph.get_tensor_by_name("BNTraining:0") # 最后输出算符 out = graph.get_tensor_by_name("model_outputs/BiasAdd:0") # 构造feed_dict feed_dict = {batchImgInput: X_v, labels: Y_v, keeProb: 1., BNTraining : False} # sess.run() output = sess.run(out, feed_dict=feed_dict) 这里如果在模型的搭建阶段没有好好命名的话,很有可能这个时候已经不知道算符节点的名字了,那么大家可以这样查看节点名称。 for op in graph.get_operations(): print(op.name) 这样可以打印所有算符节点的名字,然后大家可以搜索一下关键字,找一找。 编程要求 在右侧编辑器补充代码,训练得到你想要的模型,并保存到指定的路径。 测试说明 平台会对你编写的代码进行测试: 本关只允许你训练1分钟,所以请自行设置合适的batchSize和epoch,以及合适的网络结构。,平台会计算你训练好的模型在该图片集上的准确率,**如果高于给定的阈值35%**,则说明你的模型训练时在往正确的方向上逐渐收敛,平台将判定为通过本关。 开始你的任务吧,祝你成功! import os os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' import warnings warnings.filterwarnings('ignore') import tensorflow as tf import sys sys.path.append('step3') sys.path.append('step9') from generatorCompleted import batchGenerator from outputsUtilsCompleted import softmax, returnOneHot, computeAccuracy from prevModules import (Inception_traditional, Inception_parallelAsymmetricConv, Inception_AsymmetricConv,InitialPart,reduction,ResNetBlock) #********** Begin **********# # 任意发挥,完成模型并训练,保存模型PATH为: 'step10/Model/FinalNet' # 可选placeholder以及要求name参数值: # 输入X: batchImgInput / 输入Y: Labels / dropout保存概率: dropout_keep_prob # batchNorm层training: BNTraining / 前文所述ResNet实现中批数据数量: InputBatchSize #********** End **********#
最新发布
11-28
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值