apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
  name: {{ lws_name | default("vllm") }}
  namespace: vllm-project
spec:
  replicas: {{ replicas | default(1) }}
  leaderWorkerTemplate:
    size: {{ size | default(2) }}
    restartPolicy: None
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        tolerations:
          - key: "dedicated"
            operator: "Equal"
            value: "night"
            effect: "NoSchedule"
        containers:
          - name: vllm-leader
            imagePullPolicy: Always
            image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
            env:
              - name: CONFIG_YAML_PATH
                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
              - name: CONFIG_BASE_PATH
                value: "{{ config_base_path | default("") }}"
              - name: LOG_PREFIX
                value: {{ log_prefix | default("/root/.cache/ascend-logs") }}
              - name: WORKSPACE
                value: "/vllm-workspace"
              - name: FAIL_TAG
                value: {{ fail_tag | default("FAIL_TAG") }}
              - name: IS_PR_TEST
                value: "{{ is_pr_test | default("false") }}"
              - name: VLLM_ASCEND_REF
                value: {{ vllm_ascend_ref | default("main") }}
              - name: VLLM_ASCEND_REMOTE_URL
                value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
              - name: BENCHMARK_JOB_NAME
                value: {{ benchmark_job_name | default("") }}
              - name: VLLM_CI_RUNNER
                value: {{ runner | default("linux-aarch64-a3-0") }}
              - name: VLLM_ASCEND_VERSION
                value: {{ vllm_ascend_ref | default("main") }}
            command:
              - sh
              - -c
              - |
                bash /root/.cache/tests/run.sh
            resources:
              limits:
                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
                memory: 512Gi
                ephemeral-storage: 100Gi
              requests:
                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
                ephemeral-storage: 100Gi
                cpu: 125
            ports:
              - containerPort: 8080
            # readinessProbe:
            #   tcpSocket:
            #     port: 8080
            #   initialDelaySeconds: 15
            #   periodSeconds: 10
            volumeMounts:
              - mountPath: /root/.cache
                name: shared-volume
              - mountPath: /usr/local/Ascend/driver/tools
                name: driver-tools
              - mountPath: /dev/shm
                name: dshm
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: 512Gi
        - name: shared-volume
          persistentVolumeClaim:
            claimName: {{ pvc_name | default("nv-action-vllm-benchmarks-v2") }}
        - name: driver-tools
          hostPath:
            path: /usr/local/Ascend/driver/tools
    workerTemplate:
      spec:
        tolerations:
          - key: "dedicated"
            operator: "Equal"
            value: "night"
            effect: "NoSchedule"
        containers:
          - name: vllm-worker
            imagePullPolicy: Always
            image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
            env:
              - name: CONFIG_YAML_PATH
                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
              - name: CONFIG_BASE_PATH
                value: "{{ config_base_path | default("") }}"
              - name: LOG_PREFIX
                value: {{ log_prefix | default("/root/.cache/ascend-logs") }}
              - name: WORKSPACE
                value: "/vllm-workspace"
              - name: FAIL_TAG
                value: {{ fail_tag | default("FAIL_TAG") }}
              - name: IS_PR_TEST
                value: "{{ is_pr_test | default("false") }}"
              - name: VLLM_ASCEND_REF
                value: {{ vllm_ascend_ref | default("main") }}
              - name: VLLM_ASCEND_REMOTE_URL
                value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
              - name: BENCHMARK_JOB_NAME
                value: {{ benchmark_job_name | default("") }}
              - name: VLLM_CI_RUNNER
                value: {{ runner | default("linux-aarch64-a3-0") }}
            command:
              - sh
              - -c
              - |
                bash /root/.cache/tests/run.sh
            resources:
              limits:
                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
                memory: 512Gi
                ephemeral-storage: 100Gi
              requests:
                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
                ephemeral-storage: 100Gi
                cpu: 125
            volumeMounts:
              - mountPath: /root/.cache
                name: shared-volume
              - mountPath: /usr/local/Ascend/driver/tools
                name: driver-tools
              - mountPath: /dev/shm
                name: dshm
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: 512Gi
        - name: shared-volume
          persistentVolumeClaim:
            claimName: {{ pvc_name | default("nv-action-vllm-benchmarks-v2") }}
        - name: driver-tools
          hostPath:
            path: /usr/local/Ascend/driver/tools
---
apiVersion: v1
kind: Service
metadata:
  name: {{ lws_name | default("vllm") }}-leader
  namespace: vllm-project
spec:
  ports:
    - name: http
      port: 8080
      protocol: TCP
      targetPort: 8080
  selector:
    leaderworkerset.sigs.k8s.io/name: {{ lws_name | default("vllm") }}
    role: leader
  type: ClusterIP