Build Triton server without docker and deploy HuggingFace models on Google Colab platform
Environment
根据Triton 环境对应表 ,Colab 环境缺少 tensorrt-8.6.1,cudnn9-cuda-12,triton-server 版本应该选择 r23.10。
apt update && apt install -y --no-install-recommends \
ca-certificates autoconf automake build-essential docker.io git libre2-dev libssl-dev libtool libboost-dev \
libcurl4-openssl-dev libb64-dev patchelf python3-dev python3-pip python3-setuptools rapidjson-dev scons \
software-properties-common unzip wget zlib1g-dev libarchive-dev pkg-config uuid-dev libnuma-dev curl \
libboost-all-dev datacenter-gpu-manager cudnn9-cuda-12
pip3 install --upgrade pip && pip3 install --upgrade wheel setuptools tritonclient[all] diffusers>=0.27.0 transformers accelerate safetensors optimum["onnxruntime"]
upgrade boost
wget https://boostorg.jfrog.io/artifactory/main/release/1.84.0/source/boost_1_84_0.tar.gz
tar -zxvf boost_1_84_0.tar.gz
cd boost_1_84_0
chmod -R 777 .
./bootstrap.sh --with-libraries=all --with-toolset=gcc
./b2 -j20 toolset=gcc
./b2 install
install libarchive
wget https://github.com/libarchive/libarchive/releases/download/v3.6.2/libarchive-3.6.2.tar.gz
tar -zxvf libarchive-3.6.2.tar.gz
cd libarchive-3.6.2
./configure
make
sudo make install
install tensorrt-8.6.1
# 方法一
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz
tar -xvf TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz
sudo mv TensorRT-8.6.1.6/ /usr/local/
vim ~/.bashrc
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-8.6.1.6/lib
source ~/.bashrc
# 方法二
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/local_repos/nv-tensorrt-local-repo-ubuntu2204-8.6.1-cuda-12.0_1.0-1_amd64.deb
sudo cp /var/nv-tensorrt-local-repo-ubuntu2204-8.6.1-cuda-12.0/nv-tensorrt-local-42B2FC56-keyring.gpg /usr/share/keyrings/
sudo dpkg -i nv-tensorrt-local-repo-ubuntu2204-8.6.1-cuda-12.0_1.0-1_amd64.deb
Building Triton server
编译 Triton
git clone -b r23.10 https://github.com/triton