apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: {{ lws_name | default("vllm") }}
namespace: vllm-project
spec:
replicas: {{ replicas | default(1) }}
leaderWorkerTemplate:
size: {{ size | default(2) }}
restartPolicy: None
leaderTemplate:
metadata:
labels:
role: leader
spec:
tolerations:
- key: "dedicated"
operator: "Equal"
value: "night"
effect: "NoSchedule"
containers:
- name: vllm-leader
imagePullPolicy: Always
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
env:
- name: CONFIG_YAML_PATH
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
- name: CONFIG_BASE_PATH
value: "{{ config_base_path | default("") }}"
- name: LOG_PREFIX
value: {{ log_prefix | default("/root/.cache/ascend-logs") }}
- name: WORKSPACE
value: "/vllm-workspace"
- name: FAIL_TAG
value: {{ fail_tag | default("FAIL_TAG") }}
- name: IS_PR_TEST
value: "{{ is_pr_test | default("false") }}"
- name: VLLM_ASCEND_REF
value: {{ vllm_ascend_ref | default("main") }}
- name: VLLM_ASCEND_REMOTE_URL
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
- name: BENCHMARK_JOB_NAME
value: {{ benchmark_job_name | default("") }}
- name: VLLM_CI_RUNNER
value: {{ runner | default("linux-aarch64-a3-0") }}
- name: VLLM_ASCEND_VERSION
value: {{ vllm_ascend_ref | default("main") }}
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
resources:
limits:
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
ephemeral-storage: 100Gi
cpu: 125
ports:
- containerPort: 8080
# readinessProbe:
# tcpSocket:
# port: 8080
# initialDelaySeconds: 15
# periodSeconds: 10
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 512Gi
- name: shared-volume
persistentVolumeClaim:
claimName: {{ pvc_name | default("nv-action-vllm-benchmarks-v2") }}
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
workerTemplate:
spec:
tolerations:
- key: "dedicated"
operator: "Equal"
value: "night"
effect: "NoSchedule"
containers:
- name: vllm-worker
imagePullPolicy: Always
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
env:
- name: CONFIG_YAML_PATH
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
- name: CONFIG_BASE_PATH
value: "{{ config_base_path | default("") }}"
- name: LOG_PREFIX
value: {{ log_prefix | default("/root/.cache/ascend-logs") }}
- name: WORKSPACE
value: "/vllm-workspace"
- name: FAIL_TAG
value: {{ fail_tag | default("FAIL_TAG") }}
- name: IS_PR_TEST
value: "{{ is_pr_test | default("false") }}"
- name: VLLM_ASCEND_REF
value: {{ vllm_ascend_ref | default("main") }}
- name: VLLM_ASCEND_REMOTE_URL
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
- name: BENCHMARK_JOB_NAME
value: {{ benchmark_job_name | default("") }}
- name: VLLM_CI_RUNNER
value: {{ runner | default("linux-aarch64-a3-0") }}
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
resources:
limits:
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
ephemeral-storage: 100Gi
cpu: 125
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 512Gi
- name: shared-volume
persistentVolumeClaim:
claimName: {{ pvc_name | default("nv-action-vllm-benchmarks-v2") }}
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
---
apiVersion: v1
kind: Service
metadata:
name: {{ lws_name | default("vllm") }}-leader
namespace: vllm-project
spec:
ports:
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
leaderworkerset.sigs.k8s.io/name: {{ lws_name | default("vllm") }}
role: leader
type: ClusterIP