win8.1 32bit vs2013 cuda_v6.5
安装好cuda_v6.5后按照网上的教程又配好了系统的环境变量,在直接新建如下图的项目后
有一个默认的 kernel.cu 的程序,也可以运行成功。
但自己在此程序上改为调用cusparse库后就出现问题了
#include"cusparse.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#include<math.h>
#define CLEANUP(s) \
do { \
printf ("%s\n", s); \
if (host_crsRowPtrA) free(host_crsRowPtrA); \
if (host_crsColIndA) free(host_crsColIndA); \
if (host_crsValA) free(host_crsValA); \
if (crsRowPtrA) cudaFree(crsRowPtrA); \
if (crsColIndA) cudaFree(crsColIndA); \
if (crsValA) cudaFree(crsValA); \
if(host_y) free(host_y); \
if(y) cudaFree(y); \
if(host_x) free(host_x); \
if(x) cudaFree(x); \
if (descr) cusparseDestroyMatDescr(descr);\
if (handle) cusparseDestroy(handle); \
cudaDeviceReset(); \
fflush (stdout); \
} while (0)
int main()
{
cudaError_t cudaStat1, cudaStat2, cudaStat3, cudaStat4, cudaStat5, cudaStat6;
cusparseStatus_t status;
cusparseHandle_t handle = 0;
cusparseMatDescr_t descr = 0;
int * host_crsRowPtrA = 0;
int * host_crsColIndA = 0;
double * host_crsValA = 0;
int * crsRowPtrA = 0;
int * crsColIndA = 0;
double * crsValA = 0;
double *host_y = 0;
double *y = 0;
double *host_x = 0;
double *x = 0;
int n, nnz;
n = 3; nnz = 8;
host_crsRowPtrA = (int *)malloc((n + 1)*sizeof(host_crsRowPtrA[0]));
host_crsColIndA = (int *)malloc(nnz*sizeof(host_crsColIndA[0]));
host_crsValA = (double *)malloc(nnz*sizeof(host_crsValA[0]));
host_y = (double *)malloc(n*sizeof(host_y[0]));
host_x = (double *)malloc(n*sizeof(host_x[0]));
if ((!host_crsRowPtrA) || (!host_crsColIndA) ||
(!host_crsValA) || (!host_y) || (!host_x)){
CLEANUP("Host malloc failed (matrix)");
return 1;
}
host_crsValA[0] = 1.0;
host_crsValA[1] = 2.0;
host_crsValA[2] = 1.0;
host_crsValA[3] = 2.0;
host_crsValA[4] = 2.0;
host_crsValA[5] = 3.0;
host_crsValA[6] = -1.0;
host_crsValA[7] = -3.0;
host_crsRowPtrA[0] = 0;
host_crsRowPtrA[1] = 3;
host_crsRowPtrA[2] = 6;
host_crsRowPtrA[3] = 8;
host_crsColIndA[0] = 0;
host_crsColIndA[1] = 1;
host_crsColIndA[2] = 2;
host_crsColIndA[3] = 0;
host_crsColIndA[4] = 1;
host_crsColIndA[5] = 2;
host_crsColIndA[6] = 0;
host_crsColIndA[7] = 1;
host_y[0] = 0;
host_y[1] = 3;
host_y[2] = 2;
cudaStat1 = cudaMalloc((void**)&crsValA, nnz*sizeof(crsValA[0]));
cudaStat2 = cudaMalloc((void**)&crsRowPtrA, (n + 1)*sizeof(crsRowPtrA[0]));
cudaStat3 = cudaMalloc((void**)&crsColIndA, nnz*sizeof(crsColIndA[0]));
cudaStat4 = cudaMalloc((void**)&y, n*sizeof(y[0]));
cudaStat5 = cudaMalloc((void**)&x, n*sizeof(x[0]));
if ((cudaStat1 != cudaSuccess) ||
(cudaStat2 != cudaSuccess) ||
(cudaStat3 != cudaSuccess) ||
(cudaStat4 != cudaSuccess) ||
(cudaStat5 != cudaSuccess)){
CLEANUP("Device malloc failed");
return 1;
}
cudaStat1 = cudaMemcpy(crsRowPtrA, host_crsRowPtrA,
(size_t)((n + 1)*sizeof(crsRowPtrA[0])),
cudaMemcpyHostToDevice);
cudaStat2 = cudaMemcpy(crsColIndA, host_crsColIndA,
(size_t)(nnz*sizeof(crsColIndA[0])),
cudaMemcpyHostToDevice);
cudaStat3 = cudaMemcpy(crsValA, host_crsValA,
(size_t)(nnz*sizeof(crsValA[0])),
cudaMemcpyHostToDevice);
cudaStat4 = cudaMemcpy(y, host_y,
(size_t)(n*sizeof(y[0])),
cudaMemcpyHostToDevice);
if ((cudaStat1 != cudaSuccess) ||
(cudaStat2 != cudaSuccess) ||
(cudaStat3 != cudaSuccess) ||
(cudaStat4 != cudaSuccess)) {
CLEANUP("Memcpy from Host to Device failed");
return 1;
}
/* initialize cusparse library */
status = cusparseCreate(&handle);
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("CUSPARSE Library initialization failed");
return 1;
}
/* create and setup matrix descriptor */
status = cusparseCreateMatDescr(&descr);
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix descriptor initialization failed");
return 1;
}
cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
double *alpha = 0;
*alpha = 1;
cusparseSolveAnalysisInfo_t info = 0;;
status = cusparseDcsrsv_analysis(
handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
n,
nnz,
descr,
crsValA,
crsRowPtrA,
crsColIndA,
info);
if (status != CUSPARSE_STATUS_SUCCESS){
CLEANUP("cusparseDcsrsv_analysis failed");
return 1;
}
status = cusparseDcsrsv_solve(
handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
n,
alpha,
descr,
crsValA,
crsRowPtrA,
crsColIndA,
info,
y,
x);
if (status != CUSPARSE_STATUS_SUCCESS){
CLEANUP("cusparseDcsrsv_solve failed");
return 1;
}
return 1;
}
出现的错误是
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseCreate@4,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseDestroy@4,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseCreateMatDescr@4,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseDestroyMatDescr@4,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseSetMatType@8,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseSetMatIndexBase@8,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseDcsrsv_analysis@36,该符号在函数 _main 中被引用
1>kernel.cu.obj : error LNK2019: 无法解析的外部符号 _cusparseDcsrsv_solve@44,该符号在函数 _main 中被引用
1>F:\C_program\test3_cuda\Debug\test3_cuda.exe : fatal error LNK1120: 8 个无法解析的外部命令
这个问题是由于没有加入lib库,按照 http://www.cnblogs.com/scut-fm/p/3708723.html (CUDA 6.0 安装及配置( WIN7 64位 / 英伟达G卡 / VS2010 ))中第十三步和之后的步骤后可以解决问题了!