name: Integration Tests AMD
on:
workflow_call:
inputs:
matrix:
required: true
type: string
jobs:
integration-tests-amd:
runs-on: ${{ matrix.runner }}
timeout-minutes: 45
strategy:
matrix:
runner: ${{ fromJson(inputs.matrix) }}
include:
- image: rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
runner: ["self-hosted", "gfx90a"]
options: >-
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
--volume /home/runner/.triton:/github/home/.triton
- image: rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
runner: ["amd-gfx942"]
options: >-
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
--env-file /etc/podinfo/gha-gpu-isolation-settings
--volume /home/runner/.triton:/github/home/.triton
- image: rocm/7.0-preview:rocm7.0_preview_ubuntu22.04_llama2_70b_training_mlperf_mi35X_prealpha
runner: ["amd-gfx950"]
options: >-
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
--env-file /etc/podinfo/gha-gpu-isolation-settings
--volume /home/runner/.triton:/github/home/.triton
env:
RUNNER_TYPE: ${{ matrix.runner[1] }}
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
TRITON_DISABLE_LINE_INFO: 1
PROTON_SKIP_PC_SAMPLING_TEST: 1
PYTHON: "python3"
CCACHE_COMPRESS: "true"
container:
image: ${{ matrix.image }}
options: ${{ matrix.options }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'true'
- name: Compute cache keys
id: cache-key
run: |
llvm_file="cmake/llvm-hash.txt"
nvidia_file="cmake/nvidia-toolchain-version.json"
json_file="cmake/json-version.txt"
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
echo "Error: Required dependency files are missing."
exit 1
fi
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
uses: actions/cache@v4
with:
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- name: Update compiler to clang
run: |
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
- name: Install Triton
id: amd-install-triton
run: |
echo "PATH is '$PATH'"
pip uninstall -y triton pytorch-triton-rocm
if [ "${{ matrix.runner[0] }}" != "amd-gfx950" ]; then
ccache --zero-stats
fi
make dev-install
- name: CCache Stats
if: ${{ matrix.runner[0] != 'amd-gfx950' }}
run: ccache --print-stats
- name: Run lit tests
run: make test-lit
- name: Run C++ unittests
run: make test-cpp
- name: Run python tests on AMD
run: |
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
fi
pytest --capture=tee-sys -rfs -n 8 python/test/gluon/
pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py
pytest --capture=tee-sys -rfs -n 8 third_party/amd/python/test/ \
--ignore=third_party/amd/python/test/test_scalarize_packed_fops.py \
--ignore=third_party/amd/python/test/test_address_sanitizer.py
TRITON_ALWAYS_COMPILE=1 pytest --capture=tee-sys -rfs third_party/amd/python/test/test_scalarize_packed_fops.py
cd python/test/unit
pytest --capture=tee-sys -rfs -n 12 \
--ignore=blackwell \
--ignore=cuda \
--ignore=instrumentation \
--ignore=language/test_line_info.py \
--ignore=test_debug.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
if [ "${{ matrix.runner[0] }}" = "amd-gfx950" ]; then
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py -k "not test_line_info_ir_source"
else
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
fi
if [ "${{ matrix.runner[0] }}" = "amd-gfx950" ] || [ "${{ matrix.runner[0] }}" = "amd-gfx942" ]; then
cd ../../triton_kernels/
python3 -m pytest -s -n 12 tests/
fi
- name: Run distributed tests
run: |
make test-distributed
- name: Run asan tests on AMD
if: false
run: |
cd third_party/amd/python/test/
ulimit -s 1024
export PATH=$(find ~/.triton/llvm -name llvm-symbolizer -printf '%h\n'):$PATH
export LD_LIBRARY_PATH=$(find /opt -name libclang_rt.asan-x86_64.so -printf '%h\n'):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /opt -type d -wholename *lib/llvm/lib/asan):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /usr -name libcaffe2_nvrtc.so -printf '%h\n'):$LD_LIBRARY_PATH
export CLANG_ASAN_LIB=$(find /opt -name libclang_rt.asan-x86_64.so)
export HIP_ASAN_LIB=$(find /opt -wholename *lib/asan/libamdhip64.so)
ASAN_OPTIONS=detect_leaks=0,alloc_dealloc_mismatch=0 \
LD_PRELOAD=$CLANG_ASAN_LIB:$HIP_ASAN_LIB python3 -m pytest -s test_address_sanitizer.py
- name: Run regression tests
run: |
# Reenable test_functional_regression.py once it's fixed
cd python/test/regression
python3 -m pytest -s -n 8 ./test_cast_matmul.py
- name: Run microbenchmark tests
run: |
python3 python/test/microbenchmark/launch_overhead.py
- name: Run Proton tests
run: |
unset HIP_VISIBLE_DEVICES
unset ROCR_VISIBLE_DEVICES
if [ "${{ matrix.runner[0] }}" = "amd-gfx950" ]; then
python3 -m pytest -s -n 8 third_party/proton/test -k "not test_instrument_exec"
else
make test-proton
fi
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- name: Clean up caches
if: always()
run: |
rm -rf ~/.triton/cache
rm -rf ~/.ccache