torchvision.utils.save_image保存图片全黑问题

本文介绍了解决VAE模型中使用torchvision.utils.save_image保存图片出现全黑问题的过程,包括排查模型结构、调整学习率等步骤,并分享了最终解决办法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

torchvision.utils.save_image保存图片全黑

问题描述:跑VAE模型的时候,遇到利用torchvision.utils.save_image保存图片,结果保存的图片是全黑的,而且图片是由灰色慢慢变黑的
在这里插入图片描述原始图片像素值归一化以后的数据如下
在这里插入图片描述
重构出来的图片的部分数据如下(很正常啊,归一化以后的数据)
在这里插入图片描述
尝试
1、刚开始以为是模型结构的问题,但是看了好长时间都没发现有什么问题。
2、save_image函数的问题,于是改用opencv的imwrite函数来保存图片,结果还是全黑。
3、归一化问题:由于重构图片的数据值在0-1之间,所以保存的时候可能都按照0保存,于是每个值都*255,结果发现保存的图片还是全黑。
4、之后通过调试代码,发现每轮迭代的损失值变化很小很小,有的甚至没有变化,于是想到是不是参数没有回传,联想到了优化器的学习率问题。(学习率过低会导致学习速度太慢,学习率过高又容易导致难以收敛),刚开始学习率是1e-3=0.001,修改为0.0005,发现生成的图片不是全黑啦!
在这里插入图片描述
虽然全黑的问题解决了,但是重构图片的质量并不好,所以还是需要再修改的!

``` class SimpleCNN(nn.Module): def __init__(self, num_classes): super().__init__() # 原始特征提取层 self.features = nn.Sequential( nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2) ) # 原始分类器 self.classifier = nn.Sequential( nn.Flatten(), nn.Linear(128*32*32, 512), nn.ReLU(), nn.Linear(512, num_classes) ) # 新增反卷积网络(解码器) self.decoder = nn.Sequential( nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), nn.ReLU(), nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1), nn.ReLU(), nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1), nn.Sigmoid() # 输出像素值在[0,1]之间 ) def forward(self, x): x = self.features(x) return self.classifier(x) def visualize_features(self, x): # 前向传播获取特征图 features = self.features(x) # 通过反卷积重建图像 return self.decoder(features) for epoch in range(num_epochs): # 训练阶段 model.train() train_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() * images.size(0) # 验证阶段 model.eval() val_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for images, labels in val_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) loss = criterion(outputs, labels) val_loss += loss.item() * images.size(0) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # 计算指标 train_loss = train_loss / len(train_dataset) val_loss = val_loss / len(val_dataset) val_acc = 100 * correct / total # 保存最佳模型 if val_acc > best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), 'best_model.pth') print(f'Epoch [{epoch+1}/{num_epochs}] | ' f'Train Loss: {train_loss:.4f} | ' f'Val Loss: {val_loss:.4f} | ' f'Val Acc: {val_acc:.2f}%') # 在训练循环内(epoch循环结束后)添加: if (epoch+1) % 5 == 0: # 每5个epoch可视化一次 model.eval() with torch.no_grad(): # 获取验证集样本 sample_data, _ = next(iter(val_loader)) sample_data = sample_data.to(device) # 原始图像 save_image(sample_data, f'epoch_{epoch+1}_original.png') # 重建图像 reconstructed = model.visualize_features(sample_data) save_image(reconstructed, f'epoch_{epoch+1}_reconstructed.png')```原始图像和重建图像保存到哪里了,如何查看
03-27
``` class SimpleCNN(nn.Module): def __init__(self, num_classes): super().__init__() # 原始特征提取层 self.features = nn.Sequential( nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2) ) # 原始分类器 self.classifier = nn.Sequential( nn.Flatten(), nn.Linear(128*32*32, 512), nn.ReLU(), nn.Linear(512, num_classes) ) # 新增反卷积网络(解码器) self.decoder = nn.Sequential( nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), nn.ReLU(), nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1), nn.ReLU(), nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1), nn.Sigmoid() # 输出像素值在[0,1]之间 ) def forward(self, x): x = self.features(x) return self.classifier(x) def visualize_features(self, x): # 前向传播获取特征图 features = self.features(x) # 通过反卷积重建图像 return self.decoder(features) for epoch in range(num_epochs): # 训练阶段 model.train() train_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() * images.size(0) # 验证阶段 model.eval() val_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for images, labels in val_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) loss = criterion(outputs, labels) val_loss += loss.item() * images.size(0) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # 计算指标 train_loss = train_loss / len(train_dataset) val_loss = val_loss / len(val_dataset) val_acc = 100 * correct / total # 保存最佳模型 if val_acc > best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), 'best_model.pth') print(f'Epoch [{epoch+1}/{num_epochs}] | ' f'Train Loss: {train_loss:.4f} | ' f'Val Loss: {val_loss:.4f} | ' f'Val Acc: {val_acc:.2f}%') # 在训练循环内(epoch循环结束后)添加: if (epoch+1) % 5 == 0: # 每5个epoch可视化一次 model.eval() with torch.no_grad(): # 获取验证集样本 sample_data, _ = next(iter(val_loader)) sample_data = sample_data.to(device) # 原始图像 save_image(sample_data, f'epoch_{epoch+1}_original.png') # 重建图像 reconstructed = model.visualize_features(sample_data) save_image(reconstructed, f'epoch_{epoch+1}_reconstructed.png')```原始图像和重建的图像为什么没有输出显示
03-27
import cv2 import numpy as np import openpyxl from openpyxl.utils import get_column_letter def process_image(image, wb, sheet, frame_count, scale_percent=0.35): try: # 图像预处理 height, width = image.shape[:2] new_width = int(width * scale_percent) new_height = int(height * scale_percent) resized = cv2.resize(image, (new_width, new_height), cv2.INTER_AREA) # HSV颜色空间转换 hsv = cv2.cvtColor(resized, cv2.COLOR_BGR2HSV) # 改进的红色检测范围 lower_red1 = np.array([0, 150, 100]) # 提高饱和度下限 upper_red1 = np.array([8, 255, 255]) lower_red2 = np.array([172, 150, 100]) # 缩小第二个范围 upper_red2 = np.array([180, 255, 255]) # 创建优化掩膜 mask1 = cv2.inRange(hsv, lower_red1, upper_red1) mask2 = cv2.inRange(hsv, lower_red2, upper_red2) mask = cv2.bitwise_or(mask1, mask2) # 改进的形态学操作 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1) # 轮廓检测优化 contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours: # 筛选有效轮廓(面积>100) valid_contours = [c for c in contours if cv2.contourArea(c) > 100] if not valid_contours: return max_contour = max(valid_contours, key=cv2.contourArea) # 精确质心计算 M = cv2.moments(max_contour) if M["m00"] != 0: center_x = int(M["m10"] / M["m00"]) center_y = int(M["m01"] / M["m00"]) else: x, y, w, h = cv2.boundingRect(max_contour) center_x = x + w // 2 center_y = y + h // 2 # 亚像素级优化 gray_mask = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.01) corners = cv2.goodFeaturesToTrack(gray_mask, 1, 0.01, 10) if corners is not None: corners = cv2.cornerSubPix(gray_mask, corners, (3, 3), (-1, -1), criteria) center_x, center_y = int(corners[0][0][0]), int(corners[0][0][1]) # 数据记录 sheet.cell(row=frame_count + 2, column=1).value = frame_count sheet.cell(row=frame_count + 2, column=2).value = center_x sheet.cell(row=frame_count + 2, column=3).value = center_y # 可视化增强 cv2.drawContours(resized, [max_contour], -1, (0, 255, 0), 2) cv2.circle(resized, (center_x, center_y), 5, (0, 0, 255), -1) cv2.putText(resized, f"({center_x}, {center_y})", (center_x + 10, center_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) cv2.imshow('Precision Tracking', resized) cv2.waitKey(1) except Exception as e: print(f"Frame {frame_count} error: {str(e)}") def process_video(video_path, scale_percent=0.35): cap = cv2.VideoCapture(video_path) frame_count = 0 # 创建Excel文件并优化列宽 wb = openpyxl.Workbook() sheet = wb.active sheet.title = "Precision Tracking" headers = ["Frame", "Center X", "Center Y"] for col, header in enumerate(headers, 1): sheet.cell(row=1, column=col).value = header sheet.column_dimensions[get_column_letter(col)].width = 15 while cap.isOpened(): ret, frame = cap.read() if not ret: break process_image(frame, wb, sheet, frame_count, scale_percent) frame_count += 1 if frame_count % 50 == 0: print(f"Processed {frame_count} frames") # 保存优化 wb.save("precision_coordinates.xlsx") cap.release() cv2.destroyAllWindows() print(f"Total processed frames: {frame_count}") # 使用示例 if __name__ == "__main__": video_path = "1-4.mp4" process_video(video_path)无法捕捉到视频中的中心坐标
03-31
[ INFO:0@3.323] global F:\opencv2022\opencv4.5.5\sources\modules\core\src\utils\plugin_loader.impl.hpp (67) cv::plugin::impl::DynamicLib::libraryLoad load opencv_core_parallel_onetbb455_64d.dll => FAILED [ INFO:0@3.324] global F:\opencv2022\opencv4.5.5\sources\modules\core\src\utils\plugin_loader.impl.hpp (67) cv::plugin::impl::DynamicLib::libraryLoad load D:\op4.5.5_vs2022x64 with5\x64\vc17\bin\opencv_core_parallel_tbb455_64d.dll => FAILED [ INFO:0@3.325] global F:\opencv2022\opencv4.5.5\sources\modules\core\src\utils\plugin_loader.impl.hpp (67) cv::plugin::impl::DynamicLib::libraryLoad load opencv_core_parallel_tbb455_64d.dll => FAILED [ INFO:0@3.326] global F:\opencv2022\opencv4.5.5\sources\modules\core\src\utils\plugin_loader.impl.hpp (67) cv::plugin::impl::DynamicLib::libraryLoad load D:\op4.5.5_vs2022x64 with5\x64\vc17\bin\opencv_core_parallel_openmp455_64d.dll => FAILED [ INFO:0@3.327] global F:\opencv2022\opencv4.5.5\sources\modules\core\src\utils\plugin_loader.impl.hpp (67) cv::plugin::impl::DynamicLib::libraryLoad load opencv_core_parallel_openmp455_64d.dll => FAILED 原始图像尺寸: [8176 x 6132] 成功保存8位图像: D:/data1/shuchu/processed_img1.tif 成功保存8位图像: D:/data1/shuchu/processed_img2.tif ==== 解决全黑图像问题 ==== 有效区域1: [0 x 0 from (0, 0)] 有效区域2: [0 x 0 from (0, 0)] 警告: 校正后的左图可能是全黑的,尝试替代方案... 严重错误: 校正后图像仍全黑,使用原始图像作为后备 警告: 校正后的右图可能是全黑的,尝试替代方案... 严重错误: 校正后图像仍全黑,使用原始图像作为后备 成功保存8位图像: D:/data1/shuchu/rectified_left.tif 成功保存8位图像: D:/data1/shuchu/rectified_right.tif 已创建对齐验证图
最新发布
07-12
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值