# RecSDK TensorFlow Runtime and Build Image
# please configure 根据实际情况使用基础镜像
FROM ubuntu:20.04

WORKDIR /tmp

# 设置非交互式安装
ENV DEBIAN_FRONTEND=noninteractive

# 0. 配置证书(必须在替换apt源之前安装,否则无法访问新源)
RUN apt-get update && \
    apt-get install -y ca-certificates && \
    update-ca-certificates && \
    rm -rf /var/lib/apt/lists/*

# 替换清华apt源(用户根据需要可替换)
RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list && \
    sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list

# 根据实际情况选择安装需要的依赖,如果一些依赖不需要可以将对应代码去掉或注释;同时,确保下载的依赖的包名与如下代码中的包名一致,
# 否则在安装对应的依赖时可能出现找不到文件的错误。

# 1.安装编译环境
RUN apt-get update && apt-get install -y wget zlib1g-dev bzip2 libbz2-dev libssl-dev libncurses5-dev openssh-client libsqlite3-dev \
    libreadline-dev tk-dev libgdbm-dev libpcap-dev xz-utils libffi-dev libhdf5-dev patch pciutils lcov vim dos2unix g++ \
    autoconf automake libtool git net-tools make sudo unzip && \
    apt-get clean && rm -rf /var/lib/apt/lists/*

# 2.下载软件包
RUN wget https://mirrors.huaweicloud.com/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.gz
RUN wget https://cmake.org/files/v3.22/cmake-3.22.6.tar.gz
RUN wget https://github.com/h5py/h5py/archive/refs/tags/3.1.0.zip -O h5py-3.1.0.zip
RUN wget https://github.com/openucx/ucx/archive/master.zip
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
RUN wget https://mirrors.huaweicloud.com/python/3.7.5/Python-3.7.5.tar.xz
RUN wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.5/src/hdf5-1.10.5.tar.gz

# 3.安装gcc-11.2.0
RUN . /etc/profile && \
    tar -zxvf gcc-11.2.0.tar.gz && \
    cd gcc-11.2.0 && \
    wget https://mirrors.huaweicloud.com/gnu/gmp/gmp-6.1.0.tar.bz2 && \
    wget https://mirrors.huaweicloud.com/gnu/mpfr/mpfr-3.1.6.tar.bz2 && \
    wget https://mirrors.huaweicloud.com/gnu/mpc/mpc-1.0.3.tar.gz && \
    wget https://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \
    sed -i '246s/tar -xf "${ar}"/tar --no-same-owner -xf "${ar}"/' contrib/download_prerequisites && \
    ./contrib/download_prerequisites && \
    ./configure --enable-languages=c,c++ --disable-multilib --with-system-zlib --prefix=/usr/local/gcc11.2.0 && \
    make -j $(nproc) && make install && cd .. && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then find gcc-11.2.0/ -name libstdc++.so.6.0.29 -exec cp {} /usr/lib/x86_64-linux-gnu/ \;; else find gcc-11.2.0/ -name libstdc++.so.6.0.29 -exec cp {} /usr/lib/aarch64-linux-gnu/ \;; fi && \
    rm -rf gcc-11.2.0*

ENV LD_LIBRARY_PATH=/usr/local/gcc11.2.0/lib64:$LD_LIBRARY_PATH \
    PATH=/usr/local/gcc11.2.0/bin:$PATH

# 4. 安装 cmake-3.22.6
RUN . /etc/profile && gcc -v && tar -zxf cmake-3.22.6.tar.gz && \
    cd cmake-3.22.6 && \
    ./bootstrap && make -j $(nproc) && make install && cd .. && \
    rm -rf cmake-3.22.6*

# 5.安装ucx
RUN . /etc/profile && gcc -v && unzip master.zip && \
    cd ucx-master && \
    ./autogen.sh && \
    ./contrib/configure-release --prefix=/usr/local/ucx && \
    make && make install && cd .. && \
    rm -rf ucx-master*

# 6.安装openmpi,需要配置ucx
RUN . /etc/profile && gcc -v && tar -zxvf openmpi-4.1.5.tar.gz && \
    cd openmpi-4.1.5 && \
    ./configure --enable-orterun-prefix-by-default --prefix=/usr/local/openmpi --with-ucx=/usr/local/ucx CFLAGS="-fPIC" CXXFLAGS="-fPIC" && \
    make -j $(nproc) && make install && cd .. && \
    rm -rf openmpi-4.1.5*

# 6.1 将openmpi的库路径永久加入系统动态链接器缓存
RUN echo "/usr/local/openmpi/lib" > /etc/ld.so.conf.d/openmpi.conf && ldconfig

ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH \
    PATH=/usr/local/openmpi/bin:$PATH

# 7.安装openssl-1.1.1w
RUN wget https://www.openssl.org/source/openssl-1.1.1w.tar.gz
RUN . /etc/profile && \
    gcc -v && tar -zxvf openssl-1.1.1w.tar.gz && cd openssl-1.1.1w && \
    ./config --prefix=/usr/local/openssl_1.1.1 --libdir=lib --openssldir=/etc/pki/tls shared zlib && \
    make -j $(nproc) && make install && cd .. && \
    rm -rf openssl-1.1.1w*

RUN echo "/usr/local/openssl_1.1.1/lib" > /etc/ld.so.conf.d/openssl-1.1.1.conf && ldconfig

ENV PATH=/usr/local/openssl_1.1.1/bin:$PATH \
    LD_LIBRARY_PATH=/usr/local/openssl_1.1.1/lib:$LD_LIBRARY_PATH

# 8.安装python3.7.5(带openssl支持)
RUN tar -xvf Python-3.7.5.tar.xz && \
    cd Python-3.7.5 && \
    mkdir -p build && cd build && \
    ../configure --prefix=/usr/local/python3.7.5 \
    --config-cache \
    --disable-option-checking \
    --enable-shared  \
    --with-openssl=/usr/local/openssl_1.1.1  \
    --with-openssl-rpath=auto && \
    make -j $(nproc) && make install && \
    cd ../../ && rm -rf Python-3.7.5* && \
    ldconfig

ENV PATH=$PATH:/usr/local/python3.7.5/bin \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/python3.7.5/lib

RUN [ ! -f /usr/bin/python3 ] || mv /usr/bin/python3 /usr/bin/python3.bak; \
    ln -sf /usr/local/python3.7.5/bin/python3 /usr/bin/python3 && \
    ln -sf /usr/local/python3.7.5/bin/python3 /usr/bin/python

# 配置python源
   RUN mkdir -p ~/.pip && touch ~/.pip/pip.conf && \
       echo "[global]" > ~/.pip/pip.conf && \
       echo "trusted-host=mirrors.aliyun.com" >> ~/.pip/pip.conf && \
       echo "index-url=http://mirrors.aliyun.com/pypi/simple/ " >> ~/.pip/pip.conf && \
       echo "timeout=200" >> ~/.pip/pip.conf

# 9.安装hdf5和h5py

RUN tar -zxvf hdf5-1.10.5.tar.gz && \
       cd hdf5-1.10.5 && \
       ./configure --prefix=/usr/local/hdf5 --enable-shared --enable-hl && \
       make -j $(nproc) && make install && cd .. && rm -rf hdf5-1.10.5*

RUN echo "/usr/local/hdf5/lib" > /etc/ld.so.conf.d/hdf5.conf && ldconfig
ENV HDF5_DIR=/usr/local/hdf5 \
PATH=/usr/local/hdf5/bin:$PATH

ENV CPATH=/usr/local/hdf5/include/:/usr/local/hdf5/lib/
ENV LD_LIBRARY_PATH=/usr/local/hdf5/lib/:$LD_LIBRARY_PATH

RUN ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        mkdir -p /usr/lib/x86_64-linux-gnu && \
        ln -s /usr/local/hdf5/lib/libhdf5.so /usr/lib/x86_64-linux-gnu/libhdf5.so && \
        ln -s /usr/local/hdf5/lib/libhdf5_hl.so /usr/lib/x86_64-linux-gnu/libhdf5_hl.so; \
    else \
        mkdir -p /usr/lib/aarch64-linux-gnu && \
        ln -s /usr/local/hdf5/lib/libhdf5.so /usr/lib/aarch64-linux-gnu/libhdf5.so && \
        ln -s /usr/local/hdf5/lib/libhdf5_hl.so /usr/lib/aarch64-linux-gnu/libhdf5_hl.so; \
    fi

RUN mkdir -p /tmp/wheels && \
    pip3.7 install -U pip && \
    pip3.7 install "Cython<3" wheel && \
    unzip h5py-3.1.0.zip && \
    cd h5py-3.1.0 && \
    # 使用 bdist_wheel 产生二进制包,而不是直接 install
    HDF5_DIR=/usr/local/hdf5 python3.7 setup.py bdist_wheel -v && \
    cp dist/*.whl /tmp/wheels/ && \
    pip3.7 install /tmp/wheels/h5py-3.1.0-*.whl -v && \
    cd .. && rm -rf h5py-3.1.0

# 10.设置驱动路径环境变量
ARG ASCEND_BASE=/usr/local/Ascend
ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$ASCEND_BASE/driver/lib64/driver:$LD_LIBRARY_PATH

# 11.下载并安装 CANN 安装包 (根据架构自动选择)
RUN ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-x86_64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-x86_64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-x86_64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-x86_64.run; \
    elif [ "$ARCH" = "aarch64" ]; then \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-toolkit_9.0.0_linux-aarch64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-910b-ops_9.0.0_linux-aarch64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-A3-ops_9.0.0_linux-aarch64.run; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.0.0/Ascend-cann-950-ops_9.0.0_linux-aarch64.run; \
    fi

# 安装 A2 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A2 && \
    chmod +x Ascend-cann-toolkit_*.run && \
    bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2

# 安装 A3 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A3 && \
    bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3

# 安装 A5 toolkit
RUN mkdir -p /usr/local/Ascend/cann-A5 && \
    bash Ascend-cann-toolkit_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5

# 安装 910b Ops 包到 A2
RUN chmod +x Ascend-cann-910b-ops*.run && \
    bash Ascend-cann-910b-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A2

# 安装 A3 Ops 包到 A3
RUN chmod +x Ascend-cann-A3-ops*.run && \
    bash Ascend-cann-A3-ops*.run --quiet --install --install-path=/usr/local/Ascend/cann-A3

# 安装 950 Ops 包到 A5
RUN chmod +x Ascend-cann-950-ops*.run && \
    bash Ascend-cann-950-ops_9.0.0*.run --quiet --install --install-path=/usr/local/Ascend/cann-A5

# 设置默认 CANN 环境为 A2
RUN ln -sf /usr/local/Ascend/cann-A2 /usr/local/Ascend/latest && \
    ln -sf /usr/local/Ascend/cann-A2/ascend-toolkit /usr/local/Ascend/ascend-toolkit

# 创建 CANN 环境切换脚本
RUN echo '#!/bin/bash' > /usr/local/set_cann_env.sh && \
   echo 'case $1 in' >> /usr/local/set_cann_env.sh && \
   echo '  a2|A2)' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A2' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
   echo '    ;;' >> /usr/local/set_cann_env.sh && \
   echo '  a3|A3)' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A3' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
   echo '    ;;' >> /usr/local/set_cann_env.sh && \
   echo '  a5|A5)' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/cann-A5' >> /usr/local/set_cann_env.sh && \
   echo '    export ASCEND_OPP_PATH=$ASCEND_TOOLKIT_HOME/cann/opp' >> /usr/local/set_cann_env.sh && \
   echo '    ;;' >> /usr/local/set_cann_env.sh && \
   echo '  *)' >> /usr/local/set_cann_env.sh && \
   echo '    echo "Usage: source /usr/local/set_cann_env.sh [a2|a3|a5]"' >> /usr/local/set_cann_env.sh && \
   echo '    return 1' >> /usr/local/set_cann_env.sh && \
   echo '    ;;' >> /usr/local/set_cann_env.sh && \
   echo 'esac' >> /usr/local/set_cann_env.sh && \
   echo 'rm /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
   echo 'rm /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
   echo 'ln -sf $ASCEND_TOOLKIT_HOME /usr/local/Ascend/latest' >> /usr/local/set_cann_env.sh && \
   echo 'ln -sf $ASCEND_TOOLKIT_HOME/ascend-toolkit /usr/local/Ascend/ascend-toolkit' >> /usr/local/set_cann_env.sh && \
   echo 'source $ASCEND_TOOLKIT_HOME/ascend-toolkit/set_env.sh' >> /usr/local/set_cann_env.sh && \
   chmod +x /usr/local/set_cann_env.sh

# 12.下载 tensorflow 和 npu 包 (根据架构自动选择)
RUN ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
        wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl; \
        wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl; \
        rm -f tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
        rm -f *_aarch64.whl; \
    elif [ "$ARCH" = "aarch64" ]; then \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl; \
        wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/python/packages/tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
        wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl; \
        wget https://gitee.com/ascend/tensorflow/releases/download/tfa_v0.0.47_8.5.0/npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl; \
        rm -f tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
        rm -f *_x86_64.whl; \
    fi

# 13.准备外部软件包 (RecSDK),需要用户自行准备对应框架软件包,这里不再做匹配
COPY packages/tf_rec_v1_*.tar.gz /tmp/

# 定义安装算子时指定的核心类型 (a2/a3/a5),默认为a2,软件包支持智能识别,用户也可自行修改
ARG CORE_TYPE="a2"

# 14.安装tf相关的Python包以及Rec SDK
# 创建两个虚拟环境:tf1 (1.15.0) 和 tf2 (2.6.5)

# 安装mpi4py时使用该环境变量,安装完成后取消
ENV CC=/usr/local/openmpi/bin/mpicc \
    CXX=/usr/local/openmpi/bin/mpicxx

# 创建 tf1 虚拟环境
RUN python3.7 -m venv /opt/buildtools/tf1_env && \
    /opt/buildtools/tf1_env/bin/pip install -U pip && \
    /opt/buildtools/tf1_env/bin/pip install /tmp/wheels/h5py-3.1.0-*.whl && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        /opt/buildtools/tf1_env/bin/pip install tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl; \
        /opt/buildtools/tf1_env/bin/pip install npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl; \
    else \
        /opt/buildtools/tf1_env/bin/pip install tensorflow-1.15.0-cp37-cp37m-manylinux2014_aarch64.whl; \
        /opt/buildtools/tf1_env/bin/pip install npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl; \
    fi && \
    /opt/buildtools/tf1_env/bin/pip install tf_slim mpi4py && \
    CORE_TYPE=${CORE_TYPE} /opt/buildtools/tf1_env/bin/pip install /tmp/tf_rec_v1*.tar.gz && \
    rm -rf /root/.cache/pip

# 创建 tf2 虚拟环境
RUN python3.7 -m venv /opt/buildtools/tf2_env && \
    /opt/buildtools/tf2_env/bin/pip install -U pip && \
    /opt/buildtools/tf2_env/bin/pip install /tmp/wheels/h5py-3.1.0-*.whl && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        /opt/buildtools/tf2_env/bin/pip install tensorflow-2.6.5-cp37-cp37m-manylinux2010_x86_64.whl; \
        /opt/buildtools/tf2_env/bin/pip install npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl; \
    else \
        /opt/buildtools/tf2_env/bin/pip install tensorflow-2.6.5-cp37-cp37m-manylinux2014_aarch64.whl; \
        /opt/buildtools/tf2_env/bin/pip install npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl; \
    fi && \
    /opt/buildtools/tf2_env/bin/pip install tf_slim mpi4py && \
    CORE_TYPE=${CORE_TYPE} /opt/buildtools/tf2_env/bin/pip install /tmp/tf_rec_v1*.tar.gz && \
    rm -rf /root/.cache/pip

RUN unset CC && unset CXX

# 15.在系统 Python (非虚拟环境) 中安装编译依赖包
RUN pip3.7 install -U pip && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        pip3.7 install npu_bridge-1.15.0-py3-none-manylinux2014_x86_64.whl --force-reinstall; \
        pip3.7 install npu_device-2.6.5-py3-none-manylinux2014_x86_64.whl --force-reinstall; \
    else \
        pip3.7 install npu_bridge-1.15.0-py3-none-manylinux2014_aarch64.whl --force-reinstall; \
        pip3.7 install npu_device-2.6.5-py3-none-manylinux2014_aarch64.whl --force-reinstall; \
    fi && \
    pip3.7 install packaging numpy && \
    rm -rf /root/.cache/pip

# 16.清理临时目录
RUN rm -rf ./*