# RecSDK TensorFlow Runtime and Build Image
# please configure 根据实际情况使用基础镜像
FROM ubuntu:20.04
WORKDIR /tmp
# 设置非交互式安装
ENV DEBIAN_FRONTEND=noninteractive
# 0. 配置证书(必须在替换apt源之前安装,否则无法访问新源)
RUN apt-get update && \
apt-get install -y ca-certificates && \
update-ca-certificates && \
rm -rf /var/lib/apt/lists/*
# 替换清华apt源(用户根据需要可替换)
RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list && \
sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
# 根据实际情况选择安装需要的依赖,如果一些依赖不需要可以将对应代码去掉或注释;同时,确保下载的依赖的包名与如下代码中的包名一致,
# 否则在安装对应的依赖时可能出现找不到文件的错误。
# 1.安装编译环境
RUN apt-get update && apt-get install -y wget zlib1g-dev bzip2 libbz2-dev libssl-dev libncurses5-dev openssh-client libsqlite3-dev \
libreadline-dev tk-dev libgdbm-dev libpcap-dev xz-utils libffi-dev libhdf5-dev patch pciutils lcov vim dos2unix g++ \
autoconf automake libtool git net-tools make sudo unzip && \
apt-get clean && rm -rf /var/lib/apt/lists/*
# 2.下载软件包
RUN wget https://mirrors.huaweicloud.com/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.gz
RUN wget https://cmake.org/files/v3.22/cmake-3.22.6.tar.gz
RUN wget https://github.com/h5py/h5py/archive/refs/tags/3.1.0.zip -O h5py-3.1.0.zip
RUN wget https://github.com/openucx/ucx/archive/master.zip
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
RUN wget https://mirrors.huaweicloud.com/python/3.7.5/Python-3.7.5.tar.xz
RUN wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.5/src/hdf5-1.10.5.tar.gz
# 3.安装gcc-11.2.0
RUN . /etc/profile && \
tar -zxvf gcc-11.2.0.tar.gz && \
cd gcc-11.2.0 && \
wget https://mirrors.huaweicloud.com/gnu/gmp/gmp-6.1.0.tar.bz2 && \
wget https://mirrors.huaweicloud.com/gnu/mpfr/mpfr-3.1.6.tar.bz2 && \
wget https://mirrors.huaweicloud.com/gnu/mpc/mpc-1.0.3.tar.gz && \
wget https://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \
sed -i '246s/tar -xf "${ar}"/tar --no-same-owner -xf "${ar}"/' contrib/download_prerequisites && \
./contrib/download_prerequisites && \
./configure --enable-languages=c,c++ --disable-multilib --with-system-zlib --prefix=/usr/local/gcc11.2.0 && \
make -j $(nproc) && make install && cd .. && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then find gcc-11.2.0/ -name libstdc++.so.6.0.29 -exec cp {} /usr/lib/x86_64-linux-gnu/ \;; else find gcc-11.2.0/ -name libstdc++.so.6.0.29 -exec cp {} /usr/lib/aarch64-linux-gnu/ \;; fi && \
rm -rf gcc-11.2.0*
ENV LD_LIBRARY_PATH=/usr/local/gcc11.2.0/lib64:$LD_LIBRARY_PATH \
PATH=/usr/local/gcc11.2.0/bin:$PATH
# 4. 安装 cmake-3.22.6
RUN . /etc/profile && gcc -v && tar -zxf cmake-3.22.6.tar.gz && \
cd cmake-3.22.6 && \
./bootstrap && make -j $(nproc) && make install && cd .. && \
rm -rf cmake-3.22.6*
# 5.安装ucx
RUN . /etc/profile && gcc -v && unzip master.zip && \
cd ucx-master && \
./autogen.sh && \
./contrib/configure-release --prefix=/usr/local/ucx && \
make && make install && cd .. && \
rm -rf ucx-master*
# 6.安装openmpi,需要配置ucx
RUN . /etc/profile && gcc -v && tar -zxvf openmpi-4.1.5.tar.gz && \
cd openmpi-4.1.5 && \
./configure --enable-orterun-prefix-by-default --prefix=/usr/local/openmpi --with-ucx=/usr/local/ucx CFLAGS="-fPIC" CXXFLAGS="-fPIC" && \
make -j $(nproc) && make install && cd .. && \
rm -rf openmpi-4.1.5*
# 6.1 将openmpi的库路径永久加入系统动态链接器缓存
RUN echo "/usr/local/openmpi/lib" > /etc/ld.so.conf.d/openmpi.conf && ldconfig
ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH \
PATH=/usr/local/openmpi/bin:$PATH
# 7.安装openssl-1.1.1w
RUN wget https://www.openssl.org/source/openssl-1.1.1w.tar.gz
RUN . /etc/profile && \
gcc -v && tar -zxvf openssl-1.1.1w.tar.gz && cd openssl-1.1.1w && \
./config --prefix=/usr/local/openssl_1.1.1 --libdir=lib --openssldir=/etc/pki/tls shared zlib && \
make -j $(nproc) && make install && cd .. && \
rm -rf openssl-1.1.1w*
RUN echo "/usr/local/openssl_1.1.1/lib" > /etc/ld.so.conf.d/openssl-1.1.1.conf && ldconfig
ENV PATH=/usr/local/openssl_1.1.1/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/openssl_1.1.1/lib:$LD_LIBRARY_PATH
# 8.安装python3.7.5(带openssl支持)
RUN tar -xvf Python-3.7.5.tar.xz && \
cd Python-3.7.5 && \
mkdir -p build && cd build && \
../configure --prefix=/usr/local/python3.7.5 \
--config-cache \
--disable-option-checking \
--enable-shared \
--with-openssl=/usr/local/openssl_1.1.1 \
--with-openssl-rpath=auto && \
make -j $(nproc) && make install && \
cd ../../ && rm -rf Python-3.7.5* && \
ldconfig
ENV PATH=$PATH:/usr/local/python3.7.5/bin \
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/python3.7.5/lib
RUN [ ! -f /usr/bin/python3 ] || mv /usr/bin/python3 /usr/bin/python3.bak; \
ln -sf /usr/local/python3.7.5/bin/python3 /usr/bin/python3 && \
ln -sf /usr/local/python3.7.5/bin/python3 /usr/bin/python
# 配置python源
RUN mkdir -p ~/.pip && touch ~/.pip/pip.conf && \
echo "[global]" > ~/.pip/pip.conf && \
echo "trusted-host=mirrors.aliyun.com" >> ~/.pip/pip.conf && \
echo "index-url=http://mirrors.aliyun.com/pypi/simple/ " >> ~/.pip/pip.conf && \
echo "timeout=200" >> ~/.pip/pip.conf
# 9.安装hdf5和h5py
RUN tar -zxvf hdf5-1.10.5.tar.gz && \
cd hdf5-1.10.5 && \
./configure --prefix=/usr/local/hdf5 --enable-shared --enable-hl && \
make -j $(nproc) && make install && cd .. && rm -rf hdf5-1.10.5*
RUN echo "/usr/local/hdf5/lib" > /etc/ld.so.conf.d/hdf5.conf && ldconfig
ENV HDF5_DIR=/usr/local/hdf5 \
PATH=/usr/local/hdf5/bin:$PATH
ENV CPATH=/usr/local/hdf5/include/:/usr/local/hdf5/lib/
ENV LD_LIBRARY_PATH=/usr/local/hdf5/lib/:$LD_LIBRARY_PATH
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
mkdir -p /usr/lib/x86_64-linux-gnu && \
ln -s /usr/local/hdf5/lib/libhdf5.so /usr/lib/x86_64-linux-gnu/libhdf5.so && \
ln -s /usr/local/hdf5/lib/libhdf5_hl.so /usr/lib/x86_64-linux-gnu/libhdf5_hl.so; \
else \
mkdir -p /usr/lib/aarch64-linux-gnu && \
ln -s /usr/local/hdf5/lib/libhdf5.so /usr/lib/aarch64-linux-gnu/libhdf5.so && \
ln -s /usr/local/hdf5/lib/libhdf5_hl.so /usr/lib/aarch64-linux-gnu/libhdf5_hl.so; \
fi
RUN mkdir -p /tmp/wheels && \
pip3.7 install -U pip && \
pip3.7 install "Cython<3" wheel && \
unzip h5py-3.1.0.zip && \
cd h5py-3.1.0 && \
# 使用 bdist_wheel 产生二进制包,而不是直接 install
HDF5_DIR=/usr/local/hdf5 python3.7 setup.py bdist_wheel -v && \
cp dist/*.whl /tmp/wheels/ && \
pip3.7 install /tmp/wheels/h5py-3.1.0-*.whl -v && \
cd .. && rm -rf h5py-3.1.0
# 10.设置驱动路径环境变量
ARG ASCEND_BASE=/usr/local/Ascend
ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$ASCEND_BASE/driver/lib64/driver:$LD_LIBRARY_PATH
# 11.下载并安装 CANN 安装包 (根据架构自动选择)
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-x86_64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-x86_64.run; \
elif [ "$ARCH" = "aarch64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-aarch64.run; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-aarch64.run; \
fi
# 安装 A2 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A2 && \
chmod +x Ascend-cann-toolkit_*.run && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2
# 安装 A3 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A3 && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3
# 安装 A5 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A5 && \
bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5
# 安装 910b Ops 包到 A2
RUN chmod +x Ascend-cann-910b-ops*.run && \
bash Ascend-cann-910b-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2
# 安装 A3 Ops 包到 A3
RUN chmod +x Ascend-cann-A3-ops*.run && \
bash Ascend-cann-A3-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3
# 安装 950 Ops 包到 A5
RUN chmod +x Ascend-cann-950-ops*.run && \
bash Ascend-cann-950-ops_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5
# 设置默认 CANN 环境为 A2
RUN ln -sf /usr/local/Ascend/cann-A2 /usr/local/Ascend/latest && \
ln -sf /usr/local/Ascend/cann-A2/ascend-toolkit /usr/local/Ascend/ascend-toolkit
# 创建 CANN 环境切换脚本
RUN echo '#!/bin/bash' > /usr/local/set_cann_env.sh && \
echo 'case $1 in' >> /usr/local/set_cann_env.sh && \
echo ' a2|A2)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A2' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' a3|A3)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A3' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' a5|A5)' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A5' >> /usr/local/set_cann_env.sh && \
echo ' export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo ' *)' >> /usr/local/set_cann_env.sh && \
echo ' echo "Usage: source /usr/local/set_cann_env.sh [a2|a3|a5]"' >> /usr/local/set_cann_env.sh && \
echo ' return 1' >> /usr/local/set_cann_env.sh && \
echo ' ;;' >> /usr/local/set_cann_env.sh && \
echo 'esac' >> /usr/local/set_cann_env.sh && \
echo 'rm /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
echo 'rm /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
echo 'ln -sf $ASCEND_TOOLKIT_HOME /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
echo 'ln -sf $ASCEND_TOOLKIT_HOME/ascend-toolkit /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
echo 'source $ASCEND_TOOLKIT_HOME/ascend-toolkit/set_env.sh' >> /usr/local/set_cann_env.sh && \
chmod +x /usr/local/set_cann_env.sh
# 12.下载 tensorflow 和 npu 包 (根据架构自动选择)
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl; \
wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl; \
rm -f tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
rm -f *_aarch64.whl; \
elif [ "$ARCH" = "aarch64" ]; then \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl; \
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl; \
wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl; \
rm -f tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
rm -f *_x86_64.whl; \
fi
# 13.准备外部软件包 (RecSDK),需要用户自行准备对应框架软件包,这里不再做匹配
COPY packages/tf_rec_v1_*.tar.gz /tmp/
# 定义安装算子时指定的核心类型 (a2/a3/a5),默认为a2,软件包支持智能识别,用户也可自行修改
ARG CORE_TYPE="a2"
# 14.安装tf相关的Python包以及Rec SDK
# 创建两个虚拟环境:tf1 (1.15.0) 和 tf2 (2.6.5)
# 安装mpi4py时使用该环境变量,安装完成后取消
ENV CC=/usr/local/openmpi/bin/mpicc \
CXX=/usr/local/openmpi/bin/mpicxx
# 创建 tf1 虚拟环境
RUN python3.7 -m venv /opt/buildtools/tf1_env && \
/opt/buildtools/tf1_env/bin/pip install -U pip && \
/opt/buildtools/tf1_env/bin/pip install /tmp/wheels/h5py-3.1.0-*.whl && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
/opt/buildtools/tf1_env/bin/pip install tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl; \
/opt/buildtools/tf1_env/bin/pip install npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl; \
else \
/opt/buildtools/tf1_env/bin/pip install tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl; \
/opt/buildtools/tf1_env/bin/pip install npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl; \
fi && \
/opt/buildtools/tf1_env/bin/pip install tf_slim mpi4py && \
CORE_TYPE=${CORE_TYPE} /opt/buildtools/tf1_env/bin/pip install /tmp/tf_rec_v1*.tar.gz && \
rm -rf /root/.cache/pip
# 创建 tf2 虚拟环境
RUN python3.7 -m venv /opt/buildtools/tf2_env && \
/opt/buildtools/tf2_env/bin/pip install -U pip && \
/opt/buildtools/tf2_env/bin/pip install /tmp/wheels/h5py-3.1.0-*.whl && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
/opt/buildtools/tf2_env/bin/pip install tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
/opt/buildtools/tf2_env/bin/pip install npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl; \
else \
/opt/buildtools/tf2_env/bin/pip install tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
/opt/buildtools/tf2_env/bin/pip install npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl; \
fi && \
/opt/buildtools/tf2_env/bin/pip install tf_slim mpi4py && \
CORE_TYPE=${CORE_TYPE} /opt/buildtools/tf2_env/bin/pip install /tmp/tf_rec_v1*.tar.gz && \
rm -rf /root/.cache/pip
RUN unset CC && unset CXX
# 15.在系统 Python (非虚拟环境) 中安装编译依赖包
RUN pip3.7 install -U pip && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
pip3.7 install npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl --force-reinstall; \
pip3.7 install npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl --force-reinstall; \
else \
pip3.7 install npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl --force-reinstall; \
pip3.7 install npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl --force-reinstall; \
fi && \
pip3.7 install packaging numpy && \
rm -rf /root/.cache/pip
# 16.清理临时目录
RUN rm -rf ./*