tf.squeeze() Function函数作用

本文详细介绍了TensorFlow中的tf.squeeze()函数。该函数用于从张量中移除所有大小为1的维度,用户也可通过指定参数选择性地移除特定大小为1的维度。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

tf.squeeze() Function函数作用

tf.squeeze(input, squeeze_dims=None, name=None)

Removes dimensions of size 1 from the shape of a tensor.
从tensor中删除所有大小是1的维度

Given a tensor input, this operation returns a tensor of the same type with all dimensions of size 1 removed. If you don’t want to remove all size 1 dimensions, you can remove specific size 1 dimensions by specifying squeeze_dims.

给定张量输入,此操作返回相同类型的张量,并删除所有尺寸为1的尺寸。 如果不想删除所有尺寸1尺寸,可以通过指定squeeze_dims来删除特定尺寸1尺寸。
如果不想删除所有大小是1的维度,可以通过squeeze_dims指定。

# Copyright 2018 DeepMind Technologies Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Code defining LEO inner loop. See "Meta-Learning with Latent Embedding Optimization" by Rusu et al. (https://arxiv.org/pdf/1807.05960.pdf). """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from six.moves import range from six.moves import zip import sonnet as snt import tensorflow as tf import tensorflow_probability as tfp import data as data_module def get_orthogonality_regularizer(orthogonality_penalty_weight): """Returns the orthogonality regularizer.""" def orthogonality(weight): """Calculates the layer-wise penalty encouraging orthogonality.""" with tf.name_scope(None, "orthogonality", [weight]) as name: w2 = tf.matmul(weight, weight, transpose_b=True) wn = tf.norm(weight, ord=2, axis=1, keepdims=True) + 1e-32 correlation_matrix = w2 / tf.matmul(wn, wn, transpose_b=True) matrix_size = correlation_matrix.get_shape().as_list()[0] base_dtype = weight.dtype.base_dtype identity = tf.eye(matrix_size, dtype=base_dtype) weight_corr = tf.reduce_mean( tf.squared_difference(correlation_matrix, identity)) return tf.multiply( tf.cast(orthogonality_penalty_weight, base_dtype), weight_corr, name=name) return orthogonality class LEO(snt.AbstractModule): """Sonnet module implementing the inner loop of LEO.""" def __init__(self, config=None, use_64bits_dtype=True, name="leo"): super(LEO, self).__init__(name=name) self._float_dtype = tf.float64 if use_64bits_dtype else tf.float32 self._int_dtype = tf.int64 if use_64bits_dtype else tf.int32 self._inner_unroll_length = config["inner_unroll_length"] self._finetuning_unroll_length = config["finetuning_unroll_length"] self._inner_lr_init = config["inner_lr_init"] self._finetuning_lr_init = config["finetuning_lr_init"] self._num_latents = config["num_latents"] self._dropout_rate = config["dropout_rate"] self._kl_weight = config["kl_weight"] # beta self._encoder_penalty_weight = config["encoder_penalty_weight"] # gamma self._l2_penalty_weight = config["l2_penalty_weight"] # lambda_1 # lambda_2 self._orthogonality_penalty_weight = config["orthogonality_penalty_weight"] assert self._inner_unroll_length > 0, ("Positive unroll length is necessary" " to create the graph") def _build(self, data, is_meta_training=True): """Connects the LEO module to the graph, creating the variables. Args: data: A data_module.ProblemInstance constaining Tensors with the following shapes: - tr_input: (N, K, dim) - tr_output: (N, K, 1) - tr_info: (N, K) - val_input: (N, K_valid, dim) - val_output: (N, K_valid, 1) - val_info: (N, K_valid) where N is the number of classes (as in N-way) and K and the and K_valid are numbers of training and validation examples within a problem instance correspondingly (as in K-shot), and dim is the dimensionality of the embedding. is_meta_training: A boolean describing whether we run in the training mode. Returns: Tensor with the inner validation loss of LEO (include both adaptation in the latent space and finetuning). """ if isinstance(data, list): data = data_module.ProblemInstance(*data) self.is_meta_training = is_meta_training self.save_problem_instance_stats(data.tr_input) latents, kl = self.forward_encoder(data) tr_loss, adapted_classifier_weights, encoder_penalty = self.leo_inner_loop( data, latents) val_loss, val_accuracy = self.finetuning_inner_loop( data, tr_loss, adapted_classifier_weights) val_loss += self._kl_weight * kl val_loss += self._encoder_penalty_weight * encoder_penalty # The l2 regularization is is already added to the graph when constructing # the snt.Linear modules. We pass the orthogonality regularizer separately, # because it is not used in self.grads_and_vars. regularization_penalty = ( self._l2_regularization + self._decoder_orthogonality_reg) batch_val_loss = tf.reduce_mean(val_loss) batch_val_accuracy = tf.reduce_mean(val_accuracy) return batch_val_loss + regularization_penalty, batch_val_accuracy @snt.reuse_variables def leo_inner_loop(self, data, latents): with tf.variable_scope("leo_inner"): inner_lr = tf.get_variable( "lr", [1, 1, self._num_latents], dtype=self._float_dtype, initializer=tf.constant_initializer(self._inner_lr_init)) starting_latents = latents loss, _ = self.forward_decoder(data, latents) for _ in range(self._inner_unroll_length): loss_grad = tf.gradients(loss, latents) # dLtrain/dz latents -= inner_lr * loss_grad[0] loss, classifier_weights = self.forward_decoder(data, latents) if self.is_meta_training: encoder_penalty = tf.losses.mean_squared_error( labels=tf.stop_gradient(latents), predictions=starting_latents) encoder_penalty = tf.cast(encoder_penalty, self._float_dtype) else: encoder_penalty = tf.constant(0., self._float_dtype) return loss, classifier_weights, encoder_penalty @snt.reuse_variables def finetuning_inner_loop(self, data, leo_loss, classifier_weights): tr_loss = leo_loss with tf.variable_scope("finetuning"): finetuning_lr = tf.get_variable( "lr", [1, 1, self.embedding_dim], dtype=self._float_dtype, initializer=tf.constant_initializer(self._finetuning_lr_init)) for _ in range(self._finetuning_unroll_length): loss_grad = tf.gradients(tr_loss, classifier_weights) classifier_weights -= finetuning_lr * loss_grad[0] tr_loss, _ = self.calculate_inner_loss(data.tr_input, data.tr_output, classifier_weights) val_loss, val_accuracy = self.calculate_inner_loss( data.val_input, data.val_output, classifier_weights) return val_loss, val_accuracy @snt.reuse_variables def forward_encoder(self, data): encoder_outputs = self.encoder(data.tr_input) relation_network_outputs = self.relation_network(encoder_outputs) latent_dist_params = self.average_codes_per_class(relation_network_outputs) latents, kl = self.possibly_sample(latent_dist_params) return latents, kl @snt.reuse_variables def forward_decoder(self, data, latents): weights_dist_params = self.decoder(latents) # Default to glorot_initialization and not stddev=1. fan_in = self.embedding_dim.value fan_out = self.num_classes.value stddev_offset = np.sqrt(2. / (fan_out + fan_in)) classifier_weights, _ = self.possibly_sample(weights_dist_params, stddev_offset=stddev_offset) tr_loss, _ = self.calculate_inner_loss(data.tr_input, data.tr_output, classifier_weights) return tr_loss, classifier_weights @snt.reuse_variables def encoder(self, inputs): with tf.variable_scope("encoder"): after_dropout = tf.nn.dropout(inputs, rate=self.dropout_rate) regularizer = tf.contrib.layers.l2_regularizer(self._l2_penalty_weight) initializer = tf.initializers.glorot_uniform(dtype=self._float_dtype) encoder_module = snt.Linear( self._num_latents, use_bias=False, regularizers={"w": regularizer}, initializers={"w": initializer}, ) outputs = snt.BatchApply(encoder_module)(after_dropout) return outputs @snt.reuse_variables def relation_network(self, inputs): with tf.variable_scope("relation_network"): regularizer = tf.contrib.layers.l2_regularizer(self._l2_penalty_weight) initializer = tf.initializers.glorot_uniform(dtype=self._float_dtype) relation_network_module = snt.nets.MLP( [2 * self._num_latents] * 3, use_bias=False, regularizers={"w": regularizer}, initializers={"w": initializer}, ) total_num_examples = self.num_examples_per_class*self.num_classes inputs = tf.reshape(inputs, [total_num_examples, self._num_latents]) left = tf.tile(tf.expand_dims(inputs, 1), [1, total_num_examples, 1]) right = tf.tile(tf.expand_dims(inputs, 0), [total_num_examples, 1, 1]) concat_codes = tf.concat([left, right], axis=-1) outputs = snt.BatchApply(relation_network_module)(concat_codes) outputs = tf.reduce_mean(outputs, axis=1) # 2 * latents, because we are returning means and variances of a Gaussian outputs = tf.reshape(outputs, [self.num_classes, self.num_examples_per_class, 2 * self._num_latents]) return outputs @snt.reuse_variables def decoder(self, inputs): with tf.variable_scope("decoder"): l2_regularizer = tf.contrib.layers.l2_regularizer(self._l2_penalty_weight) orthogonality_reg = get_orthogonality_regularizer( self._orthogonality_penalty_weight) initializer = tf.initializers.glorot_uniform(dtype=self._float_dtype) # 2 * embedding_dim, because we are returning means and variances decoder_module = snt.Linear( 2 * self.embedding_dim, use_bias=False, regularizers={"w": l2_regularizer}, initializers={"w": initializer}, ) outputs = snt.BatchApply(decoder_module)(inputs) self._orthogonality_reg = orthogonality_reg(decoder_module.w) return outputs def average_codes_per_class(self, codes): codes = tf.reduce_mean(codes, axis=1, keep_dims=True) # K dimension # Keep the shape (N, K, *) codes = tf.tile(codes, [1, self.num_examples_per_class, 1]) return codes def possibly_sample(self, distribution_params, stddev_offset=0.): means, unnormalized_stddev = tf.split(distribution_params, 2, axis=-1) stddev = tf.exp(unnormalized_stddev) stddev -= (1. - stddev_offset) stddev = tf.maximum(stddev, 1e-10) distribution = tfp.distributions.Normal(loc=means, scale=stddev) if not self.is_meta_training: return means, tf.constant(0., dtype=self._float_dtype) samples = distribution.sample() kl_divergence = self.kl_divergence(samples, distribution) return samples, kl_divergence def kl_divergence(self, samples, normal_distribution): random_prior = tfp.distributions.Normal( loc=tf.zeros_like(samples), scale=tf.ones_like(samples)) kl = tf.reduce_mean( normal_distribution.log_prob(samples) - random_prior.log_prob(samples)) return kl def predict(self, inputs, weights): after_dropout = tf.nn.dropout(inputs, rate=self.dropout_rate) # This is 3-dimensional equivalent of a matrix product, where we sum over # the last (embedding_dim) dimension. We get [N, K, N, K] tensor as output. per_image_predictions = tf.einsum("ijk,lmk->ijlm", after_dropout, weights) # Predictions have shape [N, K, N]: for each image ([N, K] of them), what # is the probability of a given class (N)? predictions = tf.reduce_mean(per_image_predictions, axis=-1) return predictions def calculate_inner_loss(self, inputs, true_outputs, classifier_weights): model_outputs = self.predict(inputs, classifier_weights) model_predictions = tf.argmax( model_outputs, -1, output_type=self._int_dtype) accuracy = tf.contrib.metrics.accuracy(model_predictions, tf.squeeze(true_outputs, axis=-1)) return self.loss_fn(model_outputs, true_outputs), accuracy def save_problem_instance_stats(self, instance): num_classes, num_examples_per_class, embedding_dim = instance.get_shape() if hasattr(self, "num_classes"): assert self.num_classes == num_classes, ( "Given different number of classes (N in N-way) in consecutive runs.") if hasattr(self, "num_examples_per_class"): assert self.num_examples_per_class == num_examples_per_class, ( "Given different number of examples (K in K-shot) in consecutive" "runs.") if hasattr(self, "embedding_dim"): assert self.embedding_dim == embedding_dim, ( "Given different embedding dimension in consecutive runs.") self.num_classes = num_classes self.num_examples_per_class = num_examples_per_class self.embedding_dim = embedding_dim @property def dropout_rate(self): return self._dropout_rate if self.is_meta_training else 0.0 def loss_fn(self, model_outputs, original_classes): original_classes = tf.squeeze(original_classes, axis=-1) # Tensorflow doesn't handle second order gradients of a sparse_softmax yet. one_hot_outputs = tf.one_hot(original_classes, depth=self.num_classes) return tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_outputs, logits=model_outputs) def grads_and_vars(self, metatrain_loss): """Computes gradients of metatrain_loss, avoiding NaN. Uses a fixed penalty of 1e-4 to enforce only the l2 regularization (and not minimize the loss) when metatrain_loss or any of its gradients with respect to trainable_vars are NaN. In practice, this approach pulls the variables back into a feasible region of the space when the loss or its gradients are not defined. Args: metatrain_loss: A tensor with the LEO meta-training loss. Returns: A tuple with: metatrain_gradients: A list of gradient tensors. metatrain_variables: A list of variables for this LEO model. """ metatrain_variables = self.trainable_variables metatrain_gradients = tf.gradients(metatrain_loss, metatrain_variables) nan_loss_or_grad = tf.logical_or( tf.is_nan(metatrain_loss), tf.reduce_any([tf.reduce_any(tf.is_nan(g)) for g in metatrain_gradients])) regularization_penalty = ( 1e-4 / self._l2_penalty_weight * self._l2_regularization) zero_or_regularization_gradients = [ g if g is not None else tf.zeros_like(v) for v, g in zip(tf.gradients(regularization_penalty, metatrain_variables), metatrain_variables)] metatrain_gradients = tf.cond(nan_loss_or_grad, lambda: zero_or_regularization_gradients, lambda: metatrain_gradients, strict=True) return metatrain_gradients, metatrain_variables @property def _l2_regularization(self): return tf.cast( tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), dtype=self._float_dtype) @property def _decoder_orthogonality_reg(self): return self._orthogonality_reg 中解码器将潜在代码映射为分类器参数,然后将分类器参数用于更新分类器,再通过更新后的分类器求损失,这些对应哪段代码?
05-15
using System; //提供了基本的数据类型、异常处理和其他常用功能。 using System.IO; //用于文件和目录的输入输出操作,这里主要用于读取 CSV 文件。 using System.Linq; //提供了 LINQ(Language Integrated Query)功能,方便对数据进行查询和处理。 using Tensorflow; using Tensorflow.Keras; //Keras 是高级神经网络 API,命名空间提供了构建和训练神经网络所需的类和方法。 using Tensorflow.Keras.Models; //Keras.Models 是高级神经网络 API,命名空间提供了构建和训练神经网络所需的类和方法。 using Tensorflow.Keras.Layers; //Keras.Layers 是高级神经网络 API,命名空间提供了构建和训练神经网络所需的类和方法。 using Tensorflow.Keras.Engine; //fit引用 using static Tensorflow.Binding;//tf引用 using Tensorflow.Keras.Optimizers;// using static Tensorflow.KerasApi; using Tensorflow.Keras.ArgsDefinition; //keras引用 using Tensorflow.NumPy; //尽量不用NumSharp using System.Collections.Generic; using Tensorflow.Keras.Losses; using static System.Runtime.InteropServices.JavaScript.JSType; using System.Text; using Tensorflow.Keras.Metrics; using PureHDF.Selections; using Tensorflow.Keras.Callbacks;// 回调函数命名空间 using Tensorflow.Data;// 新增:基础数据操作命名空间 using Tensorflow.Data.Experimental;// 新增:实验性数据操作命名空间 namespace DNN_Keras { // 3.model Subclassing 搭建模型类 // 2.模型搭建:model Subclassing构建深度神经网络。CustomModel 类 :继承自 keras.Model ,通过 Call 方法定义了模型的前向传播过程。模型包含两个隐藏层和一个输出层,隐藏层使用 ReLU 激活函数。 //自定义模型类 CustomModel public class CustomModel : Model { private readonly Layer hiddenLayer1; // 定义隐藏层 1 private readonly Layer hiddenLayer2; // 定义隐藏层 2 private readonly Layer hiddenLayer3; // 定义隐藏层 3 private readonly Layer hiddenLayer4; // 定义隐藏层 4 private readonly Layer outputLayer; // 定义输出层 //构造函数,接收 CustomModelArgs 参数并调用基类构造函数 //public CustomModel() : base(new ModelArgs()) public CustomModel() : base(new ModelArgs()) { // 初始化各层 hiddenLayer1 = (Layer)keras.layers.Dense(32, activation: "relu"); // 初始化隐藏层 1,有64个神经元,使用 ReLU 激活函数 hiddenLayer2 = (Layer)keras.layers.Dense(34, activation: "relu"); // 初始化隐藏层 2,有32个神经元,使用 ReLU 激活函数 hiddenLayer3 = (Layer)keras.layers.Dense(36, activation: "relu"); // 初始化隐藏层 2,有32个神经元,使用 ReLU 激活函数 hiddenLayer4 = (Layer)keras.layers.Dense(38, activation: "relu"); // 初始化隐藏层 2,有32个神经元,使用 ReLU 激活函数 outputLayer = (Layer)keras.layers.Dense(35, activation: "softmax"); // 初始化输出层,有1个输出单元,无激活函数 } //前向传播逻辑,定义模型如从输入得到输出 protected virtual Tensor Call(Tensor inputs, Tensor? state = null, bool training = false) { Tensor x = hiddenLayer1.Apply(inputs); // 将输入数据传入隐藏层 1 x = hiddenLayer2.Apply(x); // 将隐藏层 1 的输出传入隐藏层 2 x = hiddenLayer3.Apply(x); // 将隐藏层 2 的输出传入隐藏层 3 x = hiddenLayer4.Apply(x); // 将隐藏层 3 的输出传入隐藏层 4 return outputLayer.Apply(x); // 将隐藏层 4 的输出传入输出层并返回结果 } } class Program { static void Main() { //一、获取CSV数据,配合数据处理函数LoadCSVData使用 // 1. 声明一个字符串变量 csvPath ,存储CSV文件的绝对路径,从CSV加载数据 string csvPath = @"D:\编程软件系列\VS2022社区版\文件\DNN_Keras\数据\大了Number.csv"; // 2.加载并拆分数据,调用 LoadCSVData 方法传入 csvPath 作为参数,方法内部读取CSV数据,返回训练集/测试集的特征和标签,并解包到四个变量中 var (x_train, y_train, x_test, y_test) = LoadCSVData(csvPath); //使用tf.data.Dataset.from_tensor_slices方法创建训练集数据集 var dataset1 = tf.data.Dataset.from_tensor_slices(x_train, y_train); //对dataset1 数据进行预处理 dataset1 = dataset1.shuffle(10) //打乱原始数据,10为每10个数据一组进行随机乱序.先shuffle再batch ,否则只能打乱批次顺序,无法保证批次内样本随机。 .repeat(1) //数据复制倍数,默认无限复制,1为1倍 .batch(4); //数据每批次内数据数量,4为1个批次内4个数据 //.take(2) //显示数据批次数量,2为显示2个批次,一般不使用(全显示) //.prefetch(1) //数据预取出,一般不使用 //.map(); //对数据集每个元素数据应用 Function 自定义函数或 Lambda 表达式,一般为数据类型转换函数,可在数据处理函数内使用其它方式处理 //使用tf.data.Dataset.from_tensor_slices方法创建测试集数据集 var dataset2 = tf.data.Dataset.from_tensor_slices(x_test, y_test); //对dataset2 数据进行预处理 dataset2 = dataset2.take(1); print(" 训练集输出 "); foreach (var(item_x1,item_y1)in dataset1) { print($"x:{item_x1.numpy()},y:{item_y1.numpy()}"); } print(" 测试集输出 "); foreach (var (item_x2, item_y2) in dataset2) { print($"x:{item_x2.numpy()},y:{item_y2.numpy()}"); } // 二. 调用模型类 var model = new CustomModel(); // 三、自定义损失函数(位置必须放在优化器前面) Func<Tensor, Tensor, Tensor> cross_entropy_loss = (logits, labels) => { labels = tf.cast(labels, tf.int64); // 将输入的 y 张量的数据类型转换为 int64 var loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits); // 计算稀疏的 softmax 交叉熵损失 return tf.reduce_mean(loss); // 对计算得到的损失取平均值 }; //准确率评估函数 Func<Tensor, Tensor, Tensor> accuracy = (y_pred, y_true) => { //比较预测类别和真实类别是否一致。预测类别是预测向量中最高分值的索引(即argmax)。 //tf.equal 比较两个张量对应位置的元素是否相等,返回一个布尔类型的张量 correct_prediction ,其中每个元素表示对应样本的预测结果是否正确。 // tf.math.argmax 函数用于找出张量在指定轴上的最大值的索引。这里 y_pred 通常是模型输出的概率分布,1 表示在每个样本的类别维度上查找最大值的索引,也就是模型预测的类别。 //tf.cast 函数将 y_true 张量的数据类型转换为 tf.int64 。这是为了和 tf.math.argmax 的输出类型保持一致,方便后续比较。 var correct_prediction = tf.equal(tf.math.argmax(y_pred, 1), tf.cast(y_true, tf.int64)); // 计算准确率(正确预测的比例) //tf.reduce_mean 函数计算张量在指定轴上的平均值。 axis: -1 表示在最后一个轴上计算平均值。由于 correct_prediction 转换后每个元素为 1.0 或 0.0 ,所以计算平均值就得到了准确率。最后将这个准确率作为结果返回。 //tf.cast(correct_prediction, tf.float32) :将布尔类型的 correct_prediction 张量转换为 tf.float32 类型,其中 true 会被转换为 1.0 , false 会被转换为 0.0 。 return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis: -1); }; // 四、创建随机梯度下降(AdamW)优化器keras.optimizers.AdamW实例,用于更新模型参数,学习率learning_rate:0.01f, 权重衰减weight_decay: 0.0001f var optimizer = keras.optimizers.SGD(learning_rate:0.01f); //定义run_optimization委托(无返回值),接受两个`Tensor`参数(输入特征x和标签y),用于执行一次优化步骤。 Action<Tensor, Tensor> run_optimization = (x, y) => { //创建tf.GradientTape求导记录器,记录后面所有计算步骤用于求导梯度。using确保资源正确释放,避免内存泄漏。 using var g = tf.GradientTape(); //调用神经网络模型model的Apply方法,传入输入x,并设置training: true表示处于训练模式(影响如Dropout、BatchNorm等层的行为)。这一步执行前向传播,得到模型的预测值pred。 var pred = model.Apply(x, training: true); //调用自定义损失函数cross_entropy_loss,计算预测值pred与真实标签y之间的交叉熵损失。 var loss = cross_entropy_loss(pred, y); //调用GradientTape实例 g 的gradient方法计算损失相对于模型可训练变量(TrainableVariables)的梯度。这一步是反向传播的核心,计算各参数的梯度值。 var gradients = g.gradient(loss, model.TrainableVariables); //优化器应用计算得到的梯度来更新模型参数。zip函数将梯度和对应的可训练变量配对,ApplyGradients方法根据优化器的策略(如AdamW的学习率)更新参数。需要注意变量类型转换(as ResourceVariable),确保优化器能正确处理变量。 //optimizer.apply_gradients(zip(gradients, model.TrainableVariables.Select(x => x as ResourceVariable))); // 修正:使用 Enumerable.Zip 正确配对梯度和变量,并转换为列表 var grads_and_vars = Enumerable.Zip(gradients,model.TrainableVariables.Cast<ResourceVariable>(), (g, v) => (g, v)).ToList(); // 确保变量是 ResourceVariable 类型 optimizer.apply_gradients(grads_and_vars); // 应用梯度 }; // 五、自定义循环训练模型 // 遍历训练数据集(enumerate自动生成step索引) foreach (var (step, (batch_x, batch_y)) in enumerate(dataset1, 1)) { //调用 run_optimization 方法,将当前批次的输入数据 batch_x 和标签数据 batch_y 作为参数传入。这个方法的主要功能是执行一次优化步骤,也就是依据当前批次的数据计算梯度,然后更新神经网络的参数W和b的值。 run_optimization(batch_x, batch_y); //条件判断语句, step 是当前批次的序号, display_step=100 是一个预设的整数,表示每隔多少步输出一次训练信息。当 step 是 display_step 的整数倍时,就执行下面的代码块。 if (step % 100 == 0) { //调用 model 对象的 Apply 方法,把当前批次的输入数据 batch_x 作为输入, training: true 表明当前处于训练模式。该方法会让输入数据通过神经网络,得到预测结果 pred 。 var pred = model.Apply(batch_x, training: true); //调用 cross_entropy_loss 方法,将预测结果 pred 和真实标签 batch_y 作为参数传入,计算交叉熵损失。交叉熵损失是分类问题里常用的损失函数,用于衡量预测结果和真实标签之间的差异。 var loss = cross_entropy_loss(pred, batch_y); //调用 accuracy 方法,将预测结果 pred 和真实标签 batch_y 作为参数传入,计算准确率。准确率表示预测正确的样本数占总样本数的比例。 var acc = accuracy(pred, batch_y); //输出当前批次的序号、损失值和准确率。 (float)loss 和 (float)acc 是将 loss 和 acc 转换为 float 类型,方便输出。 print($"step: {step}, loss: {(float)loss}, accuracy: {(float)acc}"); } } // 使用训练好的神经网络模型在验证集上测试模型。对测试数据进行预测,并计算预测结果的准确率 { //调用 neural_net 这个神经网络模型对测试数据 x_test 进行预测,得到预测结果 pred 。 //x_test :这是测试数据集,通常是一个包含多个样本的输入数据,例如在图像分类任务中, x_test 可能是一批测试图像的像素值。 //training: false :该参数表明当前处于推理(测试)模式,而非训练模式。在神经网络中,有些层(如 Dropout 、 BatchNormalization 等)在训练和推理时的行为不同。将 training 设置为 false 可以确保这些层在测试时使用正确的行为。 //pred 是模型对 x_test 的预测结果,其数据类型和形状取决于模型的输出。例如,在分类任务中, pred 可能是每个样本属于各个类别的概率分布。 var pred = model.Apply(x_test, training: false); //计算预测结果 pred 相对于真实标签 y_test 的准确率,并将结果赋值给类的成员变量 this.accuracy 。 //accuracy 函数 :这是一个自定义的函数,用于计算预测结果和真实标签之间的准确率。具体实现可能因任务而异,例如在分类任务中,通常比较预测的类别和真实类别是否一致,然后计算正确预测的样本数占总样本数的比例。 //类型转换 : (float) 是显式类型转换,将 accuracy 函数的返回值转换为 float 类型,以便赋值给 this.accuracy 。 //pred :上一步得到的模型预测结果。y_test :测试数据的真实标签,与 x_test 中的样本一一对应。 var testaccuracy = (float)accuracy(pred, y_test); //打印测试集的准确率。 print($"Test Accuracy: {accuracy}"); } } // 定义静态方法 LoadCSVData,接收CSV文件路径,处理数据,返回(训练特征、训练标签、测试特征、测试标签)的元组 static (NDArray, NDArray, NDArray, NDArray) LoadCSVData(string filePath) { //1. 读取CSV文件所有行,使用File.ReadAllLines读取文件内容到字符串数组 var lines = File.ReadAllLines(filePath); // 2. 验证文件基本格式(至少包含标题行+1个数据行),行数不足时抛异常 if (lines.Length < 2) throw new ArgumentException("CSV文件至少需要包含标题行和一个数据行"); // 3. 初始化特征和标签存储容器 var features = new List<float[]>(); // 存储每个样本的特征数组(每个样本是float[]) var labels = new List<float>(); // 存储每个样本的标签(float类型 //4.遍历数据行(跳过第1行标题) foreach (var line in lines.Skip(1)) // Skip(1)跳过标题行,只处理数据行 { var values = line.Split(','); // 4.1 按逗号分割当前行内容 if (values.Length < 2) throw new FormatException($"数据行格式错误:{line}(至少需要1个特征和1个标签)"); // 4.2 验证数据行格式(至少1个特征+1个标签) var featureValues = new float[values.Length - 1]; // 解析特征(排除最后一列)// 4.3 解析特征值(排除最后一列标签),特征数组长度=总列数-1(排除标签列) for (int i = 0; i < values.Length - 1; i++) { if (!float.TryParse(values[i], out float feature)) throw new FormatException($"无法解析特征值:{values[i]}(行:{line})");// 尝试将字符串转换为float,失败时抛异常并提示具体值和行内容 featureValues[i] = feature; // 存储解析后的特征值 } features.Add(featureValues); // 将当前行特征添加到特征列表 // 4.4 解析标签值(最后一列) if (!float.TryParse(values.Last(), out float label)) throw new FormatException($"无法解析标签值:{values.Last()}(行:{line})"); // 尝试转换最后一列值为float labels.Add(label); // 存储解析后的标签值 } // 5. 验证所有样本的特征数是否一致(避免特征维度混乱) int featureCount = features[0].Length; // 以第一个样本的特征数为基准 if (features.Any(f => f.Length != featureCount)) throw new InvalidDataException("所有样本的特征数必须一致"); // 检查是否有样本特征数不一致 // 6. 将特征列表转换为二维矩形数组(兼容TensorFlow的NDArray) int sampleCount = features.Count; // 总样本数 var xRect = new float[sampleCount, featureCount]; // 二维矩形数组(样本数×特征数) for (int i = 0; i < sampleCount; i++) { for (int j = 0; j < featureCount; j++) { xRect[i, j] = features[i][j]; // 将List<float[]>转换为float } } /// 7. 转换为TensorFlow的NDArray(模型可处理的张量格式) var x = np.array(xRect); // 特征张量(形状:[样本数, 特征数]) var y = np.array(labels.ToArray()).reshape(new Shape(-1, 1)); // 标签张量(形状:[样本数, 1]) // 8. 按8:2比例拆分训练集和测试集(数据按照排列顺序拆分) int splitIndex = (int)(x.shape[0] * 0.8); // 80 % 样本作为训练集 return (x[new Slice(0, splitIndex)], // 训练特征 x_train(前80%样本) y[new Slice(0, splitIndex)], // 训练标签 y_train(前80%样本) x[new Slice(splitIndex)], // 测试特征 x_test(后20%样本) y[new Slice(splitIndex)]); // 测试标签 y_test(后20%样本) } } } 运行时提示 System.ArgumentException HResult=0x80070057 Message=Tensor must have 0 dimensions in order to convert to scalar Source=Tensorflow.Binding StackTrace: 在 Tensorflow.Tensor.EnsureScalar(Tensor tensor) 在 Tensorflow.Tensor.op_Explicit(Tensor tensor) 在 DNN_Keras.Program.Main() 在 D:\编程软件系列\VS2022社区版\文件\DNN_Keras\Program.cs 中: 第 151 行
06-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值