基于昇腾设备的镜像打包流程

基于昇腾设备的镜像打包流程

Dockerfile文件内容如下:

# 基础镜像 可以从 https://www.hiascend.com/developer/ascendhub 下载
FROM swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-infer:24.0.RC3-ubuntu20.04 

# 切换到 root 用户
USER root

# 工作目录设置为 /tmp
WORKDIR /tmp

# 更新系统并安装必要的包
RUN apt update && \
    apt install --no-install-recommends -y \
    ffmpeg wget curl gcc make pkg-config libc6-dev unzip vim \
    libssl-dev zlib1g-dev libncurses5-dev libgdbm-dev \
    libnss3-dev libreadline-dev libffi-dev libsqlite3-dev \
    tk-dev libbz2-dev liblzma-dev && \
    apt clean && rm -rf /var/lib/apt/lists/*

# 拷贝本地文件到容器内
COPY local/dcmi /usr/local/dcmi
COPY local/bin/npu-smi /usr/local/bin/npu-smi
COPY local/Ascend/driver/lib64/common /usr/local/Ascend/driver/lib64/common
COPY local/Ascend/driver/lib64/driver /usr/local/Ascend/driver/lib64/driver
COPY ascend_install.info /etc/ascend_install.info
COPY vnpu.cfg /etc/vnpu.cfg
COPY ascend-toolkit /usr/local/Ascend/ascend-toolkit

# 设置环境变量
ENV LD_LIBRARY_PATH=/usr/local/python3.10.16/lib:/usr/lib/aarch64-linux-gnu/hdf5/serial:$LD_LIBRARY_PATH \
    PATH=/usr/local/python3.10.16/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin


# 安装 Python 3.10.16
RUN wget https://repo.huaweicloud.com/python/3.10.16/Python-3.10.16.tar.xz && \
    tar -xf Python-3.10.16.tar.xz && cd Python-3.10.16 && \
    ./configure --prefix=/usr/local/python3.10.16 --enable-shared && \
    make && make install && \
    ln -sf /usr/local/python3.10.16/bin/python3 /usr/bin/python3 && \
    ln -sf /usr/local/python3.10.16/bin/python3 /usr/bin/python && \
    ln -sf /usr/local/python3.10.16/bin/pip3 /usr/bin/pip3 && \
    ln -sf /usr/local/python3.10.16/bin/pip3 /usr/bin/pip && \
    cd .. && rm -rf Python* && \
    mkdir -p ~/.pip && \
    echo '[global]' > ~/.pip/pip.conf && \
    echo 'index-url=http://mirrors.aliyun.com/pypi/simple' >> ~/.pip/pip.conf && \
    echo 'trusted-host=mirrors.aliyun.com' >> ~/.pip/pip.conf && \
    pip3 install --upgrade pip

# 安装 Python 依赖包
RUN pip3 install -U pip && \
    pip3 install \
    websockets==14.1 \
    torch==2.1.0 \
    onnxruntime==1.20.1 \
    onnx==1.17.0 \
    onnxconverter-common==1.14.0 \
    torch-npu==2.1.0.post10 \
    torchaudio==2.1.0 \
    pymilvus==2.5.2 \
    PyMySQL==1.1.1 \
    redis==5.2.1 \
    Flask==3.0.3 \
    loguru==0.7.2 \
    tqdm \
    PyYAML \
    librosa==0.10.2.post1 \
    pydub==0.25.1 \
    sentencepiece==0.2.0 \
    cryptography==43.0.0 \
    hydra-core==1.3.2 \
    requests==2.31.0 \
    kaldiio==2.18.0 \
    gunicorn==22.0.0 && \
    rm -rf /root/.cache/pip

# 创建 lib64 的软链接,防止 Euler 宿主机工具出错
RUN if [ ! -d "/lib64" ]; then \
        mkdir /lib64 && ln -sf /lib/ld-linux-aarch64.so.1 /lib64/ld-linux-aarch64.so.1; \
    fi

# 清理临时文件
RUN rm -rf /tmp/*

# 创建HwHiAiUser用户,密码HwHiAiUser,用户组HwHiAiUser,用户ID和组ID均为1001 (业务要求)
RUN id -u HwHiAiUser &>/dev/null && \
    usermod -u 1001 HwHiAiUser && \
    groupmod -g 1001 HwHiAiUser || \
    useradd -m -d /home/HwHiAiUser -s /bin/bash -u 1001 -g 1001 -p $(openssl passwd -1 HwHiAiUser) HwHiAiUser

# 切换到 HwHiAiUser 用户
USER HwHiAiUser

# 工作目录切换到项目文件夹
WORKDIR /app/cmbchina-voiceprint-recognition-service

# 拷贝项目相关文件
COPY dguard_models /app/dguard_models

RUN /usr/local/Ascend/ascend-toolkit/set_env.sh

# 设置环境变量 (解决报错)
ENV LD_PRELOAD=/home/HwHiAiUser/.local/lib/python3.10/site-packages/scikit_learn.libs/libgomp-d22c30c5.so.1.0.0:$LD_PRELOAD \
    DGUARD_MODEL_PATH=/app/dguard_models

容器启动命令

sudo docker run -itd \
--name ascend-test \
--network host \
--device=/dev/davinci0 \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/common \
-v /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v /etc/vnpu.cfg:/etc/vnpu.cfg \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
--cpus="4" \
longyuan-vpr-ascend-test:v1.0


sudo docker run -it \
--rm \
--name ascend-test \
--network host \
--device=/dev/davinci0 \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
--cpus="4" \
-v /home/blgx/app/cmbchina-voiceprint-recognition-service:/app/cmbchina-voiceprint-recognition-service \
longyuan-vpr-ascend-test:v1.0

报错记录

Traceback (most recent call last):
  File "/usr/local/python3.10.16/lib/python3.10/site-packages/torch_npu/__init__.py", line 17, in <module>
    import torch_npu.npu
  File "/usr/local/python3.10.16/lib/python3.10/site-packages/torch_npu/npu/__init__.py", line 114, in <module>
    from torch_npu.utils import _should_print_warning
  File "/usr/local/python3.10.16/lib/python3.10/site-packages/torch_npu/utils/__init__.py", line 1, in <module>
    from torch_npu import _C
ImportError: libhccl.so: cannot open shared object file: No such file or directory

# 解决
# 要想运行 torch_npu库还是需要安装 ascend-toolkit ,可以直接从宿主机将该文件夹cp到 /usr/local/Ascend 路径下
HwHiAiUser@blgx:/app/cmbchina-voiceprint-recognition-service$ python main.py
Traceback (most recent call last):
  File "/app/cmbchina-voiceprint-recognition-service/main.py", line 12, in <module>
    from pipeline.blacklist_pipeline import blacklist_register, blacklist_delete, blacklist_compare
  File "/app/cmbchina-voiceprint-recognition-service/pipeline/__init__.py", line 2, in <module>
    from dguard import DguardModel as dm
  File "/app/cmbchina-voiceprint-recognition-service/dguard/__init__.py", line 3, in <module>
    from dguard.interface.client import WebSocketClient as DguardClient
  File "/app/cmbchina-voiceprint-recognition-service/dguard/interface/client.py", line 22, in <module>
    from dguard.speaker.diar.extract_emb import subsegment
  File "/app/cmbchina-voiceprint-recognition-service/dguard/speaker/__init__.py", line 1, in <module>
    from dguard.speaker.cli.speaker import load_model  # noqa
  File "/app/cmbchina-voiceprint-recognition-service/dguard/speaker/cli/speaker.py", line 31, in <module>
    from dguard.speaker.diar.spectral_clusterer import cluster
  File "/app/cmbchina-voiceprint-recognition-service/dguard/speaker/diar/spectral_clusterer.py", line 29, in <module>
    from sklearn.cluster._kmeans import k_means
  File "/home/HwHiAiUser/.local/lib/python3.10/site-packages/sklearn/__init__.py", line 97, in <module>
    from .utils._show_versions import show_versions
  File "/home/HwHiAiUser/.local/lib/python3.10/site-packages/sklearn/utils/_show_versions.py", line 15, in <module>
    from ._openmp_helpers import _openmp_parallelism_enabled
ImportError: /home/HwHiAiUser/.local/lib/python3.10/site-packages/sklearn/utils/../../scikit_learn.libs/libgomp-d22c30c5.so.1.0.0: cannot allocate memory in static TLS block


#解决
export LD_PRELOAD=/home/HwHiAiUser/.local/lib/python3.10/site-packages/scikit_learn.libs/libgomp-d22c30c5.so.1.0.0:$LD_PRELOAD


评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值