apiVersion: batch.volcano.sh/v1alpha1   # The value cannot be changed. The volcano API must be used.
kind: Job                               # Only the job type is supported at present.
metadata:
  name: train-env-quick-validation
spec:
  minAvailable: 1                       # The value of minAvailable is 1 in a single-node scenario and N in an N-node distributed scenario.
  schedulerName: volcano                # Use the Volcano scheduler to schedule jobs.
  maxRetry: 1
  queue: default
  tasks:
  - name: "default-test"
    replicas: 1                              # The value of replicas is 1 in a single-node scenario and N in an N-node scenario. The number of NPUs in the requests field is 8 in an N-node scenario.
    template:
      metadata:
        labels:
          app: train
      spec:
        containers:
        - image: ascend-k8sdeviceplugin:v3.0.0       # Training framework image, which can be modified.
          name: train-env-quick-validation
          imagePullPolicy: IfNotPresent
          command: ["/bin/bash", "-c", "npu-smi info"]
          resources:
            requests:
              huawei.com/Ascend910: 1                 # Number of required NPUs. The maximum value is 8. You can add lines below to configure resources such as memory and CPU.
            limits:
              huawei.com/Ascend910: 1                 # The value must be consistent with that in requests.
          volumeMounts:
          - name: ascend-driver
            mountPath: /usr/local/Ascend/driver
          - name: npu-smi
            mountPath: /usr/local/bin/npu-smi
        volumes:
        - name: ascend-driver
          hostPath:
            path: /usr/local/Ascend/driver
        - name: npu-smi
          hostPath:
            path: /usr/local/bin/npu-smi
        restartPolicy: OnFailure