pytorch BUG :.grad=NONE? pytorch参数的使用

部署运行你感兴趣的模型镜像

      在实验中,输出发现网络的某个新增的参数不更新。在输出 tensor.grad=NONE。然后查找资料进行修改输出从

tensor([0.9925], device='cuda:0', grad_fn=<CopyBackwards>)
tensor([0.9925], device='cuda:0', grad_fn=<CopyBackwards>)

变为了:tensor([0.4926], device='cuda:0', requires_grad=True)

原因:对需要求梯度的张量做了一次to(device)导致叶节点发生变化。

修改方案

self.A = torch.nn.Parameter(torch.rand(10, 20),requires_grad=True).cuda()

改为

self.A = torch.nn.Parameter(torch.rand(10, 20,device = torch.device('cuda')),requires_grad=True)#.cuda()

    print(type(model.named_parameters()))
    for name, param in model.named_parameters():
        print(name, param.size())

并且

self.C = torch.nn.Parameter(torch.rand(1000,2000,device=torch.device('cuda')),requires_grad=False)* self.B

self.A = torch.nn.Parameter(torch.rand(1000,2000,device=torch.device('cuda')),requires_grad=False)
self.C = self.A* self.B

的网络参数输出也不同,后者会包含self.A和self.B,前者只有self.B。

参考与更多

https://www.youkuaiyun.com/tags/MtTagg4sMDYxMjktYmxvZwO0O0OO0O0O.html

您可能感兴趣的与本文相关的镜像

PyTorch 2.5

PyTorch 2.5

PyTorch
Cuda

PyTorch 是一个开源的 Python 机器学习库,基于 Torch 库,底层由 C++ 实现,应用于人工智能领域,如计算机视觉和自然语言处理

✅ 学习建议:如何从这个脚本进阶? ✅ 把 BoW 替换成 TF-IDF 或 One-Hot ✅ 添加 ReLU 激活函数 ✅ 改成多分类任务(3 类情绪) ✅ 引入 sklearn 做对比实验 ✅ 用 PyTorch 重写这个流程 ✅ 最终迁移到 Hugging Face + Transformers # ======================================== # 🧠 纯 Python 实现的 AI 模型全流程 # 包括:数据处理 → 模型定义 → 训练 → 保存 → 推理 # 无需 PyTorch/TensorFlow,仅使用 numpy 和标准库 # ======================================== import numpy as np import pickle import re from collections import Counter import random # ================================ # 1. 模拟数据集(正面/负面评论) # ================================ def create_dataset(): positive_texts = [ "I love this movie it's amazing", "great acting and wonderful story", "best film ever watched", "highly recommended excellent quality", "beautiful cinematography and music" ] negative_texts = [ "this movie is terrible and boring", "awful acting waste of time", "worst film I have ever seen", "boring plot and bad editing", "not recommended at all" ] texts = positive_texts + negative_texts labels = [1] * len(positive_texts) + [0] * len(negative_texts) return texts, labels texts, labels = create_dataset() print("✅ 数据集创建完成") for t, l in zip(texts, labels): print(f" '{t}' -> {l}") # ================================ # 2. 文本预处理与词汇表构建 # ================================ def clean_text(text): text = re.sub(r'[^a-zA-Z\s]', '', text.lower()) return text.strip() def build_vocab(texts, min_freq=1): words = [] for text in texts: cleaned = clean_text(text) words.extend(cleaned.split()) freq = Counter(words) vocab = {'<PAD>': 0, '<UNK>': 1} for word, count in freq.items(): if count >= min_freq: vocab[word] = len(vocab) return vocab vocab = build_vocab(texts) print(f"\n✅ 构建词汇表完成,大小: {len(vocab)}") print("示例:", dict(list(vocab.items())[:10])) # ================================ # 3. 文本向量化(Bag-of-Words) # ================================ def text_to_bow(text, vocab): vec = np.zeros(len(vocab)) cleaned = clean_text(text) for word in cleaned.split(): idx = vocab.get(word, 1) # <UNK> vec[idx] += 1 return vec X = np.array([text_to_bow(t, vocab) for t in texts]) y = np.array(labels).reshape(-1, 1) print(f"\n✅ 向量化完成 X.shape={X.shape}, y.shape={y.shape}") print("示例向量:", X[0][:10]) # ================================ # 4. 定义神经网络(MLP) # ================================ class SimpleNN: def __init__(self, input_size, hidden_size=8, output_size=1): self.W1 = np.random.randn(input_size, hidden_size) * 0.5 self.b1 = np.zeros((1, hidden_size)) self.W2 = np.random.randn(hidden_size, output_size) * 0.5 self.b2 = np.zeros((1, output_size)) self.lr = 0.01 def sigmoid(self, z): # 防止溢出 z = np.clip(z, -500, 500) return 1 / (1 + np.exp(-z)) def sigmoid_derivative(self, a): return a * (1 - a) def forward(self, X): self.z1 = np.dot(X, self.W1) + self.b1 self.a1 = self.sigmoid(self.z1) self.z2 = np.dot(self.a1, self.W2) + self.b2 self.a2 = self.sigmoid(self.z2) return self.a2 def backward(self, X, y_true): m = X.shape[0] # 输出层误差 dz2 = (self.a2 - y_true) * self.sigmoid_derivative(self.a2) dW2 = np.dot(self.a1.T, dz2) / m db2 = np.sum(dz2, axis=0, keepdims=True) / m # 隐藏层误差 da1 = np.dot(dz2, self.W2.T) dz1 = da1 * self.sigmoid_derivative(self.a1) dW1 = np.dot(X.T, dz1) / m db1 = np.sum(dz1, axis=0, keepdims=True) / m # 更新参数 self.W2 -= self.lr * dW2 self.b2 -= self.lr * db2 self.W1 -= self.lr * dW1 self.b1 -= self.lr * db1 def train_step(self, X, y): pred = self.forward(X) loss = -np.mean(y * np.log(pred + 1e-8) + (1 - y) * np.log(1 - pred + 1e-8)) self.backward(X, y) return loss def predict(self, X): prob = self.forward(X) return (prob > 0.5).astype(int), prob # 初始化模型 model = SimpleNN(input_size=X.shape[1], hidden_size=6) print(f"\n✅ 模型初始化完成 W1:{model.W1.shape} W2:{model.W2.shape}") # ================================ # 5. 训练模型 # ================================ epochs = 1000 for epoch in range(epochs): loss = model.train_step(X, y) if epoch % 200 == 0: print(f"Epoch {epoch}, Loss: {loss:.4f}") print("\n✅ 训练完成") # ================================ # 6. 评估与推理测试 # ================================ pred_label, pred_prob = model.predict(X) print("\n📊 训练集预测结果:") for i, text in enumerate(texts): real = y[i][0] pred = pred_label[i][0] prob = pred_prob[i][0] status = "✅" if real == pred else "❌" print(f"{status} '{text}' -> 真实:{real} 预测:{pred} (置信度:{prob:.2f})") # ================================ # 7. 保存模型和词汇表 # ================================ def save_model(model, vocab, filepath): data = { 'W1': model.W1, 'b1': model.b1, 'W2': model.W2, 'b2': model.b2, 'vocab': vocab } with open(filepath, 'wb') as f: pickle.dump(data, f) print(f"\n💾 模型已保存至 {filepath}") save_model(model, vocab, "simple_nlp_model.pkl") # ================================ # 8. 加载模型并进行新推理 # ================================ def load_and_predict(text, model_path, vocab): with open(model_path, 'rb') as f: data = pickle.load(f) # 重建模型结构 class InferenceModel: def __init__(self, data): self.W1 = data['W1'] self.b1 = data['b1'] self.W2 = data['W2'] self.b2 = data['b2'] def sigmoid(self, z): z = np.clip(z, -500, 500) return 1 / (1 + np.exp(-z)) def forward(self, x): z1 = np.dot(x, self.W1) + self.b1 a1 = self.sigmoid(z1) z2 = np.dot(a1, self.W2) + self.b2 a2 = self.sigmoid(z2) return a2 loaded_model = InferenceModel(data) vector = text_to_bow(text, vocab).reshape(1, -1) prob = loaded_model.forward(vector)[0][0] pred = 1 if prob > 0.5 else 0 print(f"\n🔍 新句子推理: '{text}'") print(f" 预测类别: {pred} (正面情绪)" if pred == 1 else f" 预测类别: {pred} (负面情绪)") print(f" 置信度: {prob:.2f}") return pred, prob # 测试新句子 load_and_predict("I really enjoyed this fantastic movie", "simple_nlp_model.pkl", vocab) load_and_predict("this is a terrible and boring film", "simple_nlp_model.pkl", vocab) 把进阶的完整代码给我,并标注修改的部分以及原因
最新发布
11-26
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值