name: Smart E2E
on:
workflow_call:
inputs:
vllm:
type: string
required: true
description: 'The vllm commit hash or tag to test.'
test_groups:
type: string
required: true
description: 'JSON array of test groups from determine_smart_e2e_scope.py'
defaults:
run:
shell: bash -el {0}
permissions:
contents: read
pull-requests: write
issues: write
jobs:
smart-ut-test:
name: smart-ut (${{ matrix.group.npu_type }} x${{ matrix.group.num_npus }})
strategy:
fail-fast: false
matrix:
group: ${{ fromJSON(inputs.test_groups) }}
runs-on: ${{ matrix.group.runner }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:${{ matrix.group.image_tag }}
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
HF_HUB_OFFLINE: 1
MAX_JOBS: 4
UV_INDEX_URL: http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
UV_EXTRA_INDEX_URL: "https://repo.huaweicloud.com/ascend/repos/pypi"
UV_INDEX_STRATEGY: unsafe-best-match
UV_INSECURE_HOST: cache-service.nginx-pypi-cache.svc.cluster.local
UV_HTTP_TIMEOUT: 120
UV_NO_CACHE: 1
UV_SYSTEM_PYTHON: 1
steps:
- name: Check NPU availability
id: check_npu
run: |
if npu-smi info > /dev/null 2>&1; then
echo "has_npu=true" >> $GITHUB_OUTPUT
else
echo "has_npu=false" >> $GITHUB_OUTPUT
fi
- name: Install packages
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
pip install uv
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty uv pip install . ${{ steps.check_npu.outputs.has_npu == 'false' && '--extra-index-url https://download.pytorch.org/whl/cpu' || '' }}
pip uninstall -y triton
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
- name: Get csrc hash
id: get_csrc_hash
run: |
CSRC_HASH=$(find ./csrc ./setup.py ./CMakeLists.txt ./cmake \
-type f -not -path '*/.*' | sort | xargs sha256sum | sha256sum | awk '{print $1}')
echo "CSRC_HASH=$CSRC_HASH" >> $GITHUB_OUTPUT
- name: Cache vllm-ascend csrc
id: cache-csrc
if: ${{ steps.check_npu.outputs.has_npu == 'true' }}
uses: runs-on/cache@v4
with:
path: |
vllm_ascend/_cann_ops_custom
vllm_ascend/*.so
vllm_ascend/lib
vllm_ascend/include
key: vllm-ascend-build-v1-${{ runner.os }}-swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}
restore-keys: |
vllm-ascend-build-v1-${{ runner.os }}-swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}
- name: Install vllm-project/vllm-ascend with device
if: ${{ steps.check_npu.outputs.has_npu == 'true' }}
run: |
export MAX_JOBS=$(( ${{ matrix.group.num_npus }} * 23 ))
pip install uc-manager
uv pip install -r requirements-dev.txt
uv pip install --force-reinstall --no-deps triton-ascend==3.2.1
if find vllm_ascend -maxdepth 1 -name '*.so' -type f 2>/dev/null | grep -q .; then
COMPILE_CUSTOM_KERNELS=0 uv pip install -e .
else
uv pip install -e .
fi
- name: Install vllm-project/vllm-ascend no device
if: ${{ steps.check_npu.outputs.has_npu == 'false' }}
env:
SOC_VERSION: ascend910b1
COMPILE_CUSTOM_KERNELS: 0
run: |
pip install uc-manager
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pip install . --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://repo.huaweicloud.com/ascend/repos/pypi
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://repo.huaweicloud.com/ascend/repos/pypi
- name: Run smart UT with device
if: ${{ steps.check_npu.outputs.has_npu == 'true' }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
run: |
echo -e "\033[1;34m=== TEST INFO ===\033[0m"
echo -e " \033[33mDevice:\033[0m ${{ matrix.group.npu_type }}"
if [ "${{ matrix.group.npu_type }}" != "cpu" ]; then
echo -e " \033[33mNPU count:\033[0m ${{ matrix.group.num_npus }}"
fi
echo -e " \033[33mTargets:\033[0m"
for target in ${{ matrix.group.tests }}; do
echo -e " \033[32m-\033[0m ${target}"
done
echo -e "\033[1;34m====================\033[0m"
pytest -sv --color=yes ${{ matrix.group.tests }}
- name: Run smart UT without device
if: ${{ steps.check_npu.outputs.has_npu == 'false' }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
shell: bash
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
echo -e "\033[1;34m=== TEST INFO ===\033[0m"
echo -e " \033[33mDevice:\033[0m ${{ matrix.group.npu_type }}"
if [ "${{ matrix.group.npu_type }}" != "cpu" ]; then
echo -e " \033[33mNPU count:\033[0m ${{ matrix.group.num_npus }}"
fi
echo -e " \033[33mTargets:\033[0m"
for target in ${{ matrix.group.tests }}; do
echo -e " \033[32m-\033[0m ${target}"
done
echo -e "\033[1;34m====================\033[0m"
pytest -sv --color=yes ${{ matrix.group.tests }}