1.在jetson板子上安装工具
sudo apt update
sudo apt install -y apt-utils
sudo apt install -y libglfw3 libglfw3-dev
sudo apt install -y cuda-toolkit-12-2 openmpi-bin libopenmpi-dev mpich libdrm-dev pkg-config libfreeimage-dev freeglut3-dev cmake
sudo apt install -y libopenblas-base libopenmpi-dev libomp-dev
sudo apt install -y libjpeg-dev zlib1g-dev libpython3-dev libopenblas-dev libavcodec-dev libavformat-dev libswscale-dev
sudo apt install -y python3 python3-dev python3-distutils python3-venv python3-pip
sudo apt install -y zlib1g software-properties-common lsb-release cmake build-essential libtool autoconf unzip htop ninja-build terminator zip
2. 下载cuda_samples源码 编译源码
$ cd ${HOME}
$ git clone -b v12.2 https://github.com/NVIDIA/cuda-samples.git
$ cd cuda-samples
$ make clean
$ make
3. 运行cuda_samples的test demo
$ cd ${HOME}/cuda-samples/bin/aarch64/linux/release
1.
$ ./bandwidthTest
2.
$ ./deviceQuery
3.
$ ./simpleGL
4.
$ ./boxFilter
5.
$ ./nbody
6.
$ ./smokeParticles
7.
$ ./particles
8.
$ ./FDTD3d
9.
$ ./simpleCUBLAS
10.
$ ./batchCUBLAS
11.
$ ./simpleCUFFT
12.
$ ./MersenneTwisterGP11213
有些测试用例需要在显示器上跑,所以不要在串口上运行
4.下载cudnn以及cudnn_sample
我这边用的版本是是cudnn-8.9.0.131版本 对应cuda版本是12
$ cd ${HOME}
$ mkdir cudnn
$ cd cudnn
$ wget https://developer.download.nvidia.cn/compute/cudnn/redist/cudnn/linux-aarch64/cudnn-linux-aarch64-8.9.0.131_cuda12-archive.tar.xz
$ wget https://developer.download.nvidia.cn/compute/cudnn/redist/cudnn_samples/linux-aarch64/cudnn_samples-linux-aarch64-8.9.0.131_cuda12-archive.tar.xz
5.解压安装cudnn
$ sudo tar -xvf /opt/ai/cudnn/cudnn-linux-aarch64-8.9.0.131_cuda12-archive.tar.xz -C /usr/lib/aarch64-linux-gnu/ --strip-components=2 cudnn-linux-aarch64-8.9.0.131_cuda12-archive/lib/
$ sudo tar -xvf /opt/ai/cudnn/cudnn-linux-aarch64-8.9.0.131_cuda12-archive.tar.xz -C /usr/include --strip-components=2 cudnn-linux-aarch64-8.9.0.131_cuda12-archive/include/
$ cat /usr/include/cudnn_version.h | grep -E "CUDNN_MAJOR|CUDNN_MINOR"
6.安装miniconda3
下载Miniconda3-latest-Linux-aarch64.sh后执行
$ ./Miniconda3-latest-Linux-aarch64.sh -b -p ${HOME}/miniconda3 -f
$ export PATH=${HOME}/miniconda3/bin:$PATH
$ conda init
初始化完后重启下设备
7 安装pytorch
先执行下面搭建环境和安装必须工具
$ conda create -n torchenv python=3.10 pip
$ conda activate torchenv
$ pip install Cython
$ pip install "Numpy<2"
$ pip install pillow
下载torch-2.4.0a0+07cecf4168.nv24.05.14710581-cp310-cp310-linux_aarch64.whl
torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl
这2个安装包后本地安装
$ pip install /opt/ai/pytorch/torch-2.4.0a0+07cecf4168.nv24.05.14710581-cp310-cp310-linux_aarch64.whl
$ pip install /opt/ai/pytorch/torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl
安装完后可以测试了 写一个python代码文件
iimport torch
print(torch.__version__)
print('CUDA available: ' + str(torch.cuda.is_available()))
print('cuDNN version: ' + str(torch.backends.cudnn.version()))
a = torch.cuda.FloatTensor(2).zero_()
print('Tensor a = ' + str(a))
b = torch.randn(2).cuda()
print('Tensor b = ' + str(b))
c = a + b
print('Tensor c = ' + str(c))
执行,打印结果如下
打印结果
2.4.0a0+07cecf4168.nv24.05
CUDA available: True
cuDNN version: 8900
/opt/ai/test_torch.py:7: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/pytorch/pytorch/torch/csrc/tensor/python_tensor.cpp:78.)
a = torch.cuda.FloatTensor(2).zero_()
Tensor a = tensor([0., 0.], device='cuda:0')
Tensor b = tensor([0.9435, 1.4644], device='cuda:0')
Tensor c = tensor([0.9435, 1.4644], device='cuda:0')
安装成功!!