# RecSDK PyTorch Runtime and Build Image
# please configure 根据实际情况使用基础镜像
FROM ubuntu:22.04
WORKDIR /tmp
# 设置非交互式安装
ENV DEBIAN_FRONTEND=noninteractive
# 0. 配置证书(必须在替换apt源之前安装,否则无法访问新源)
RUN apt-get update && \
apt-get install -y ca-certificates && \
update-ca-certificates && \
rm -rf /var/lib/apt/lists/*
# 替换清华apt源(用户根据需要可替换)
RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list && \
sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
# 根据实际情况选择安装需要的依赖,如果一些依赖不需要可以将对应代码去掉或注释;同时,确保下载的依赖的包名与如下代码中的包名一致,
# 否则在安装对应的依赖时可能出现找不到文件的错误。
# 1.安装编译环境
RUN apt-get update && \
apt-get -y install net-tools make wget zlib1g-dev bzip2 libbz2-dev libssl-dev libncurses5-dev libncursesw5-dev openssh-client \
openssh-server libsqlite3-dev libopenmpi-dev libreadline-dev tk-dev libgdbm-dev libpcap-dev liblzma-dev \
libffi-dev libhdf5-dev patch pciutils lcov vim dos2unix g++ automake libtool autoconf m4 perl git e2fsprogs unzip ninja-build && \
apt-get clean && \
rm -rf /var/cache/apt/
# 注:openssh-server为双机训练样例需要,仅单机训练时可去掉
# 2.编译安装 GCC 11.2.0
RUN wget https://mirrors.huaweicloud.com/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.gz && \
tar -zxvf gcc-11.2.0.tar.gz && \
cd gcc-11.2.0 && \
wget https://mirrors.huaweicloud.com/gnu/gmp/gmp-6.1.0.tar.bz2 && \
wget https://mirrors.huaweicloud.com/gnu/mpfr/mpfr-3.1.6.tar.bz2 && \
wget https://mirrors.huaweicloud.com/gnu/mpc/mpc-1.0.3.tar.gz && \
wget https://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \
sed -i "246s/tar -xf \"\${ar}\"/tar --no-same-owner -xf \"\${ar}\"/" contrib/download_prerequisites && \
./contrib/download_prerequisites && \
./configure --enable-languages=c,c++ --disable-multilib --with-system-zlib --prefix=/usr/local/gcc11.2.0 && \
make -j $(nproc) && make install && \
find /tmp/gcc-11.2.0/ -name libstdc++.so.6.0.29 -exec cp {} /lib64/ \; && \
rm -rf /tmp/gcc-11.2.0* && \
update-alternatives --install /usr/bin/gcc gcc /usr/local/gcc11.2.0/bin/gcc 100 && \
update-alternatives --install /usr/bin/g++ g++ /usr/local/gcc11.2.0/bin/g++ 100 && \
update-alternatives --install /usr/bin/gcov gcov /usr/local/gcc11.2.0/bin/gcov 100 && \
update-alternatives --set gcc /usr/local/gcc11.2.0/bin/gcc && \
update-alternatives --set g++ /usr/local/gcc11.2.0/bin/g++ && \
update-alternatives --set gcov /usr/local/gcc11.2.0/bin/gcov
# 3.安装cmake-3.22.6
RUN wget https://cmake.org/files/v3.22/cmake-3.22.6.tar.gz && \
. /etc/profile && gcc -v && tar -zxf /tmp/cmake-3.22.6.tar.gz && \
cd cmake-3.22.6 && \
./bootstrap && make -j $(nproc) && make install && \
rm -rf /tmp/cmake-3.22.6*
# 4.安装python3.11.0
RUN wget https://mirrors.huaweicloud.com/python/3.11.0/Python-3.11.0.tar.xz && \
. /etc/profile && gcc -v && tar -xvf /tmp/Python-3.11.0.tar.xz && \
cd Python-3.11.0 && \
mkdir -p build && cd build && \
../configure --enable-shared --prefix=/usr/local/python3.11.0 && \
make -j $(nproc) && make install && \
rm -rf /tmp/Python-3.11.0* && \
ldconfig
ENV PATH=$PATH:/usr/local/python3.11.0/bin \
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/python3.11.0/lib
RUN ln -sf /usr/local/python3.11.0/bin/python3 /usr/bin/python3 && \
ln -sf /usr/local/python3.11.0/bin/python3 /usr/bin/python && \
ln -sf /usr/local/python3.11.0/bin/pip3 /usr/bin/pip3 && \
ln -sf /usr/local/python3.11.0/bin/pip3 /usr/bin/pip
# 5.设置驱动路径环境变量
ARG ASCEND_BASE=/usr/local/Ascend
ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$ASCEND_BASE/driver/lib64/driver:$LD_LIBRARY_PATH
# 6.下载并安装 CANN 安装包 (根据架构自动选择)
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-x86_64.run; \
elif [ "$ARCH" = "aarch64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-aarch64.run; \
fi
# 安装 A2 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A2 && \
chmod +x Ascend-cann-toolkit_*.run && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2
# 安装 A3 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A3 && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3
# 安装 A5 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A5 && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5
# 安装 910b Ops 包到 A2
RUN chmod +x Ascend-cann-910b-ops*.run && \
bash Ascend-cann-910b-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2
# 安装 A3 Ops 包到 A3
RUN chmod +x Ascend-cann-A3-ops*.run && \
bash Ascend-cann-A3-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3
# 安装 950 Ops 包到 A5
RUN chmod +x Ascend-cann-950-ops*.run && \
bash Ascend-cann-950-ops_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5
# 设置默认 CANN 环境为 A5
RUN ln -sf /usr/local/Ascend/cann-A5 /usr/local/Ascend/latest && \
ln -sf /usr/local/Ascend/cann-A5/ascend-toolkit /usr/local/Ascend/ascend-toolkit
# 7.创建 CANN 环境切换脚本
RUN echo '#!/bin/bash' > /usr/local/set_cann_env.sh && \
echo 'case $1 in' >> /usr/local/set_cann_env.sh && \
echo ' a2|A2)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A2' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' a3|A3)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A3' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' a5|A5)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A5' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' *)' >> /usr/local/set_cann_env.sh && \
echo ' echo "Usage: source /usr/local/set_cann_env.sh [a2|a3|a5]"' >> /usr/local/set_cann_env.sh && \
echo ' return 1' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo 'esac' >> /usr/local/set_cann_env.sh && \
echo 'rm /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
echo 'rm /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
echo 'ln -sf $ASCEND_TOOLKIT_HOME /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
echo 'ln -sf $ASCEND_TOOLKIT_HOME/ascend-toolkit /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
echo 'source $ASCEND_TOOLKIT_HOME/ascend-toolkit/set_env.sh' >> /usr/local/set_cann_env.sh && \
chmod +x /usr/local/set_cann_env.sh
# 8.配置python源
RUN mkdir -p ~/.pip && touch ~/.pip/pip.conf && \
echo "[global]" > ~/.pip/pip.conf && \
echo "trusted-host=mirrors.aliyun.com download.pytorch.org download-r2.pytorch.org" >> ~/.pip/pip.conf && \
echo "index-url=https://mirrors.aliyun.com/pypi/simple/" >> ~/.pip/pip.conf && \
echo "extra-index-url=https://download.pytorch.org/whl/cpu" >> ~/.pip/pip.conf && \
echo "timeout=200" >> ~/.pip/pip.conf
# 9.安装系统级 wheel(供 torchrec 编译使用)
RUN pip3 install wheel setuptools
# 10.准备外部软件包 (RecSDK),需要用户自行准备对应框架软件包,这里不再做匹配
COPY packages/torch_rec_v1*.tar.gz /tmp/
COPY packages/torch_rec_v2*.tar.gz /tmp/
COPY packages/fbgemm_ascend*.whl /tmp/
COPY packages/rec_sdk_ops*.whl /tmp/
# 11.创建 torch_v1_pt2.6.0 虚拟环境
RUN python3 -m venv /opt/buildtools/torch_v1_pt2.6.0 && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install pip==25.0.1 && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install wheel setuptools && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install torch==2.6.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install torch-npu==2.6.0 && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install fbgemm_gpu==1.1.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v1_pt2.6.0/bin/pip install /tmp/torch_rec_v1*.tar.gz && \
rm -rf /root/.cache/pip
# 12.创建 torch_v1_pt2.7.1 虚拟环境
RUN python3 -m venv /opt/buildtools/torch_v1_pt2.7.1 && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install pip==25.0.1 && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install wheel setuptools && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install torch==2.7.1+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install torch-npu==2.7.1 && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install fbgemm_gpu==1.2.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install /tmp/torch_rec_v1*.tar.gz && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install /tmp/fbgemm_ascend*.whl && \
/opt/buildtools/torch_v1_pt2.7.1/bin/pip install /tmp/rec_sdk_ops*.whl && \
rm -rf /root/.cache/pip
# 13.创建 torch_v2_pt2.7.1 虚拟环境(与 torch_v1_pt2.7.1 一致)
RUN python3 -m venv /opt/buildtools/torch_v2_pt2.7.1 && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install pip==25.0.1 && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install wheel setuptools && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install torch==2.7.1+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install torch-npu==2.7.1 && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install fbgemm_gpu==1.2.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install /tmp/torch_rec_v2*.tar.gz && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install /tmp/fbgemm_ascend*.whl && \
/opt/buildtools/torch_v2_pt2.7.1/bin/pip install /tmp/rec_sdk_ops*.whl && \
rm -rf /root/.cache/pip
# 14.清理临时目录
RUN rm -rf ./*