Couldn't open CUDA library cupti64_80.dll 如何解决？

最新推荐文章于 2024-12-06 15:04:26 发布

slbyzdgz

最新推荐文章于 2024-12-06 15:04:26 发布

阅读量3.9k

点赞数 11

CC 4.0 BY-SA版权

分类专栏：问题解决

本文链接：https://blog.youkuaiyun.com/qq_27318693/article/details/82849448

问题解决专栏收录该内容

4 篇文章

订阅专栏

本文解决了TensorFlow运行时找不到cupti64_80.dll的问题，详细步骤包括检查CUDA文件夹，确认cupti64_80.dll存在，调整环境变量，将该DLL复制到CUDA8.0/bin目录。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

tensorflow在运行时的错误：
... Couldn't open CUDA library cupti64_80.dll
... Non-OK-status: status_ status: Failed precondition: could not dlopen DSO: cupti64_80.dll; dlerror: cupti64_80.dll not found

解决方法：

先查看CUDA文件夹下有没有cupti64_80.dll，一般是C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\extras\CUPTI\libx64，你看，发现一只你要的东西！

然后查看环境变量，发现那个文件所在的文件夹没有在环境变量中，但是CUDA8.0/bin却在这里。

再来打开CUDA8.0/bin:

所以，把cupit64_80.dll复制到CUDA8.0/bin就好了！

确定要放弃本次机会？

福利倒计时

: :

立减 ¥

普通VIP年卡可用

立即使用

slbyzdgz

关注关注

11
点赞
踩
8

收藏

觉得还不错? 一键收藏
3
评论
分享

复制链接

分享到 QQ

分享到新浪微博

扫一扫
举报

举报

专栏目录

《Python基础教程》内容总览篇（持续更新中）

weixin_43178406的博客

08-26

30万+

大家好，我是爱编程的喵喵。双985硕士毕业，现担任全栈工程师一职，热衷于将数据思维应用到工作与生活中。从事机器学习以及相关的前后端开发工作。曾在阿里云、科大讯飞、CCF等比赛获得多次Top名次。现为优快云博客专家、人工智能领域优质创作者。喜欢通过博客创作的方式对所学的知识进行总结与归纳，不仅形成深入且独到的理解，而且能够帮助新手快速入门。个人精心开设的《Python基础课程》专栏订阅量接近900，帮助不少同学解决了Bug。

CUDA ---- CUDA库简介

weixin_30924239的博客

06-21

2131

CUDA Libraries简介上图是CUDA 库的位置，本文简要介绍cuSPARSE、cuBLAS、cuFFT和cuRAND，之后会介绍OpenACC。 cuSPARSE线性代数库，主要针对稀疏矩阵之类的。 cuBLAS是CUDA标准的线代库，不过没有专门针对稀疏矩阵的操作。 cuFFT傅里叶变换 cuRAND随机数 CUDA库和CPU编程所用到的库没有什么区别，都是...

3 条评论您还未登录，请先登录后发表或查看评论

cublas64_80.dll

07-13

Visual Studio环境配置darknet运行缺少文件，无法执行

curand64_80.dll

07-13

Visual Studio环境配置darknet运行缺少文件，无法执行

FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot

TYKJ_ZF的博客

12-06

4411

针对Linux系统中onnxruntime-gpu安装后，无法调用CUDA_VISIBLE_DEVICES的问题进行总结

【PaddlePaddle】使用百度飞桨PaddlePaddle中遇到的问题汇总

Meet_Unknown的博客

12-22

3989

Windows10下安装PaddlePaddle，进行测试时报错，初以为是缺少所致，其实不然，而是 CUDA 版本、CUDNN 版本与 Paddlepaddle 版本不兼容导致，只要三者版本对应。

06-17

std::vector<int64_t>input_shape=tensor_info.GetShape();//打印input_shape，确保与你的输入数据维度一致```###步骤2:检查输入数据类型输入张量的数据类型必须与模型期望的数据类型一致。常见的类型有`ONNX_TENSOR...

import os from datetime import datetime import tensorflow as tf import keras from tensorflow.keras.optimizers import Adam # 使用TensorFlow的Keras os.environ["SM_FRAMEWORK"] = "tf.keras" # 显式设置后端 os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async' os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices=false' # 导入 segmentation_models 并设置框架 import segmentation_models as sm sm.set_framework('tf.keras') # 确保使用 tf.keras 后端 import cv2 import numpy as np from matplotlib import pyplot as plt import matplotlib from keras.utils.np_utils import to_categorical from tensorflow.keras.utils import Sequence from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, matthews_corrcoef import pandas as pd from tqdm import tqdm import gc from keras.callbacks import EarlyStopping # 设置 matplotlib 支持中文 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS'] plt.rcParams['axes.unicode_minus'] = False # 设置环境变量以优化 GPU 性能 os.environ["SM_FRAMEWORK"] = "tf.keras" os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async' os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices=false' # 设置全精度计算 tf.keras.mixed_precision.set_global_policy('float32') # 设置 GPU 内存增长 gpus = tf.config.list_physical_devices('GPU') if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) else: print("警告：未检测到 GPU，将使用 CPU 运行") # 打印环境信息 print("TensorFlow 版本:", tf.__version__) print("segmentation_models 版本:", sm.__version__) print("GPU 设备:", tf.config.list_physical_devices('GPU')) print("CUDA 可用:", tf.test.is_built_with_cuda()) print("cuDNN 可用:", tf.test.is_built_with_gpu_support()) # 设置随机种子 np.random.seed(0) tf.random.set_seed(0) # 配置参数 n_classes = 2 SIZE = 1024 # 使用第一份代码的尺寸 base_directory = 'D:/pycharm/spark/data/data/data_1024_1024/' # 划分后的数据集路径 train_directory = os.path.join(base_directory, 'train_data') val_directory = os.path.join(base_directory, 'val_data') test_directory = os.path.join(base_directory, 'test_data') result_directory = 'D:/pycharm/spark/data/data/result3/' checkpoint_directory = 'D:/pycharm/spark/data/data/duststorm/' os.makedirs(result_directory, exist_ok=True) os.makedirs(checkpoint_directory, exist_ok=True) # 检查目录 for directory in [train_directory, val_directory, test_directory]: img_dir = os.path.join(directory, 'mdgm1') mask_dir = os.path.join(directory, 'mask1') if not os.path.exists(img_dir) or not os.path.exists(mask_dir): print(f"错误：目录 {img_dir} 或 {mask_dir} 不存在") exit() # 数据生成器类 class DataGenerator(Sequence): def __init__(self, image_paths, mask_paths, preprocess_input, batch_size=4, size=SIZE, n_classes=2): self.image_paths = image_paths self.mask_paths = mask_paths self.preprocess_input = preprocess_input self.batch_size = batch_size self.size = size self.n_classes = n_classes def __len__(self): return max(1, int(np.ceil(len(self.image_paths) / self.batch_size))) def __getitem__(self, idx): batch_image_paths = self.image_paths[idx * self.batch_size:(idx + 1) * self.batch_size] batch_mask_paths = self.mask_paths[idx * self.batch_size:(idx + 1) * self.batch_size] images = np.empty((len(batch_image_paths), self.size, self.size, 3), dtype=np.float32) masks = np.empty((len(batch_image_paths), self.size, self.size), dtype=np.uint8) for i, (img_path, mask_path) in enumerate(zip(batch_image_paths, batch_mask_paths)): img = cv2.imread(img_path, cv2.IMREAD_COLOR) if img is None: print(f"警告：无法加载图像 {img_path}") continue img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (self.size, self.size), interpolation=cv2.INTER_NEAREST) images[i] = img mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) if mask is None: print(f"警告：无法加载掩码 {mask_path}") continue mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST) mask = np.where(mask > 0, 1, 0).astype(np.uint8) masks[i] = mask if not images.size or not masks.size: print(f"警告：批次 {idx} 为空，返回占位数据") return np.zeros((1, self.size, self.size, 3), dtype=np.float32), \ np.zeros((1, self.size, self.size, self.n_classes), dtype=np.float32) images = self.preprocess_input(images) masks = to_categorical(masks, num_classes=self.n_classes).astype(np.float32) return images, masks # 过滤图像文件 def is_image_file(filename): return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')) # 加载文件路径 def load_paths(directory): img_dir = os.path.join(directory, 'mdgm1') mask_dir = os.path.join(directory, 'mask1') image_files = sorted([f for f in os.listdir(img_dir) if is_image_file(f)]) mask_files = sorted([f for f in os.listdir(mask_dir) if is_image_file(f)]) common_files = sorted(set(image_files) & set(mask_files)) image_paths = [os.path.join(img_dir, fname) for fname in common_files] mask_paths = [os.path.join(mask_dir, fname) for fname in common_files] print(f"{directory}：找到 {len(common_files)} 个匹配文件对") return image_paths, mask_paths, common_files # 加载数据集 train_image_paths, train_mask_paths, train_names = load_paths(train_directory) val_image_paths, val_mask_paths, val_names = load_paths(val_directory) test_image_paths, test_mask_paths, test_names = load_paths(test_directory) # 创建数据生成器 BACKBONE = 'inceptionv3' preprocess_input = sm.get_preprocessing(BACKBONE) train_generator = DataGenerator(train_image_paths, train_mask_paths, preprocess_input, batch_size=4) val_generator = DataGenerator(val_image_paths, val_mask_paths, preprocess_input, batch_size=4) test_generator = DataGenerator(test_image_paths, test_mask_paths, preprocess_input, batch_size=4) # 模型参数 activation = 'sigmoid' LR = 1e-5 # 使用第二份代码的学习率 optim = Adam(LR, clipnorm=1.0) metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)] loss = sm.losses.BinaryFocalLoss(alpha=0.25, gamma=2.0) + sm.losses.DiceLoss(class_weights=[0.2, 0.8]) # 加载权重 weights_path = 'D:/pycharm/spark/data/data/inceptionv3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' try: if not os.path.exists(weights_path): print(f"错误：权重文件 {weights_path} 不存在，使用 ImageNet 权重") model = sm.Unet(BACKBONE, encoder_weights='imagenet', classes=n_classes, activation=activation) else: model = sm.Unet(BACKBONE, encoder_weights=weights_path, classes=n_classes, activation=activation) except Exception as e: print(f"加载权重失败: {e}，使用随机初始化") model = sm.Unet(BACKBONE, encoder_weights=None, classes=n_classes, activation=activation) # 编译模型 with tf.device('/GPU:0'): model.compile(optim, loss=loss, metrics=metrics) print(model.summary()) # 设置回调 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") checkpoint_filepath = f'{checkpoint_directory}/train_MY30_31_inceptionv3_{timestamp}.keras' callbacks = [ tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, monitor='val_iou_score', mode='max', save_best_only=True, verbose=1 ), EarlyStopping( monitor='val_iou_score', mode='max', patience=5, restore_best_weights=True, verbose=1 ), tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1 ) ] # 训练模型 try: with tf.device('/GPU:0'): history = model.fit( train_generator, epochs=50, validation_data=val_generator, callbacks=callbacks, verbose=1 ) except Exception as e: print(f"训练失败: {e}") exit() # 绘制训练和验证曲线 def plot_metrics(history, metric_name, title, ylabel): metric = history.history[metric_name] val_metric = history.history[f'val_{metric_name}'] epochs = range(1, len(metric) + 1) plt.figure(figsize=(10, 5)) plt.plot(epochs, metric, 'y', label=f'训练 {ylabel}') plt.plot(epochs, val_metric, 'r', label=f'验证 {ylabel}') plt.title(title) plt.xlabel('轮次') plt.ylabel(ylabel) plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.tight_layout() plt.show() plot_metrics(history, 'loss', '训练和验证损失 train_MY30_31_inceptionv3', '损失') plot_metrics(history, 'iou_score', '训练和验证 IoU train_MY30_31_inceptionv3', 'IoU') # 保存训练指标 with open(f'{checkpoint_directory}/train_MY30_31_inceptionv3_{timestamp}.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['训练损失', '验证损失', '训练 IoU', '验证 IoU']) for row in zip(history.history['loss'], history.history['val_loss'], history.history['iou_score'], history.history['val_iou_score']): writer.writerow(row) # 测试集评估 def calculate_iou(pred_mask, gt_mask): intersection = np.logical_and(pred_mask, gt_mask).sum() union = np.logical_or(pred_mask, gt_mask).sum() return intersection / union if union != 0 else 1.0 def pixel_accuracy(y_true, y_pred): y_pred = tf.round(y_pred) correct_pixels = tf.reduce_sum(tf.cast(tf.equal(y_true, y_pred), tf.float32)) total_pixels = tf.cast(tf.size(y_true), tf.float32) return (correct_pixels / total_pixels).numpy() def pixel_accuracy_for_class(y_true, y_pred, class_id): y_pred_rounded = tf.cast(tf.round(y_pred), tf.int32) true_mask = tf.equal(y_true, class_id) pred_mask = tf.equal(y_pred_rounded, class_id) correct_pixels_class = tf.reduce_sum(tf.cast(tf.logical_and(true_mask, pred_mask), tf.float32)) total_pixels_class = tf.reduce_sum(tf.cast(true_mask, tf.float32)) return (correct_pixels_class / total_pixels_class).numpy() if total_pixels_class != 0 else 0.0 # 测试集预测和评估 metrics_dict = { '文件名': [], 'IoU 分数': [], 'AUC 分数': [], 'F1 分数': [], 'F1 微平均': [], 'F1 宏平均': [], '精确率 (宏平均)': [], '召回率 (宏平均)': [], '像素精度 (整体)': [], '像素精度 (尘暴)': [], 'MCC': [] } for i in tqdm(range(len(test_image_paths)), desc="测试集预测"): test_img = cv2.imread(test_image_paths[i], cv2.IMREAD_COLOR) if test_img is None: print(f"警告：无法加载测试图像 {test_image_paths[i]}") continue test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB) test_img = cv2.resize(test_img, (SIZE, SIZE), interpolation=cv2.INTER_NEAREST) test_img_input = preprocess_input(np.expand_dims(test_img, 0)) test_mask = cv2.imread(test_mask_paths[i], cv2.IMREAD_GRAYSCALE) if test_mask is None: print(f"警告：无法加载测试掩码 {test_mask_paths[i]}") continue test_mask = cv2.resize(test_mask, (SIZE, SIZE), interpolation=cv2.INTER_NEAREST) test_mask = np.where(test_mask > 0, 1, 0).astype(np.uint8) with tf.device('/GPU:0'): test_pred = model.predict(test_img_input, verbose=0) test_prediction = np.argmax(test_pred, axis=3)[0, :, :] iou = calculate_iou(test_prediction, test_mask) auc_score = roc_auc_score(test_mask.flatten(), test_pred[0, :, :, 1].flatten()) f1 = f1_score(test_mask.flatten(), test_prediction.flatten(), average='binary', zero_division=1) f1_micro = f1_score(test_mask.flatten(), test_prediction.flatten(), average='micro', zero_division=1) f1_macro = f1_score(test_mask.flatten(), test_prediction.flatten(), average='macro', zero_division=1) precision = precision_score(test_mask.flatten(), test_prediction.flatten(), average='macro', zero_division=0) recall = recall_score(test_mask.flatten(), test_prediction.flatten(), average='macro', zero_division=0) pa = pixel_accuracy(test_mask, test_prediction) pa_dust = pixel_accuracy_for_class(test_mask, test_prediction, 1) mcc = matthews_corrcoef(test_mask.flatten(), test_prediction.flatten()) metrics_dict['文件名'].append(test_names[i]) metrics_dict['IoU 分数'].append(iou) metrics_dict['AUC 分数'].append(auc_score) metrics_dict['F1 分数'].append(f1) metrics_dict['F1 微平均'].append(f1_micro) metrics_dict['F1 宏平均'].append(f1_macro) metrics_dict['精确率 (宏平均)'].append(precision) metrics_dict['召回率 (宏平均)'].append(recall) metrics_dict['像素精度 (整体)'].append(pa) metrics_dict['像素精度 (尘暴)'].append(pa_dust) metrics_dict['MCC'].append(mcc) # 可视化 plt.figure(figsize=(15, 5)) plt.subplot(1, 3, 1) plt.title('图像') plt.imshow(test_img) plt.axis('off') plt.subplot(1, 3, 2) plt.title('图像 + 掩码') plt.imshow(test_img) plt.imshow(test_mask, cmap='jet', alpha=0.5) plt.axis('off') plt.subplot(1, 3, 3) plt.title('预测 + 掩码 + IoU') plt.imshow(test_prediction, cmap='gray') plt.imshow(test_mask, cmap='jet', alpha=0.5) plt.text(0.5, -0.1, f'IoU: {iou:.3f}', ha='center', va='center', transform=plt.gca().transAxes, fontsize=10) plt.axis('off') plt.savefig(os.path.join(result_directory, f'test_{test_names[i]}'), bbox_inches='tight') plt.close() # 保存测试指标 pd.DataFrame(metrics_dict).to_csv(f'{checkpoint_directory}/test_MY30_31_inceptionv3_metrics_{timestamp}.csv', index=False) 现在是可以运行，但是运行不是用gpu运行，是使用cpu运行