mindcluster-deploy/infer-operator-deploy-tool/config/user_config.json-代码预览-MindCluster-Samples:基于MindCluster的示例代码项目 - AtomGit

ascend-robotinfer-operator-tool yaml生成部分
{
    "deploy_config": {
        "namespace": "default",
        "job_name": "qwen",
        "infer_service_num": 1,
        "prefill": {
            "hardware_type": "module-910b-8",
            "instance_count": 1,
            "single_instance_pod_num": 1,
            "single_pod_npu_num": 0,
            "image": "vllm-ascend:latest",
            "env": {
                "env1": "value1"
            },
            "labels": {
                "label1": "qwen-prefill"
            },
            "annotations": {
                "anno1": "qwen-prefill"
            }
        },
        "decode": {
            "hardware_type": "module-910b-8",
            "instance_count": 1,
            "single_instance_pod_num": 1,
            "single_pod_npu_num": 0,
            "image": "vllm-ascend:latest",
            "env": {
                "env1": "value1"
            },
            "labels": {
                "label1": "qwen-decode"
            },
            "annotations": {
                "anno1": "qwen-decode"
            }
        },
        "router": {
            "hardware_type": "module-910b-8",
            "instance_count": 1,
            "single_instance_pod_num": 1,
            "single_pod_npu_num": 0,
            "image": "vllm-ascend:latest",
            "env": {
                "env1": "value1"
            }
        }
    },
    "engine_common_config": {
        "deploy_type": "pd_separate", 
        "engine_type": "vllm",
        "serve_name": "qwen-service",
        "model_path": "/model/qwen3_30B",
        "prefill_dp_size": 2,
        "prefill_tp_size": 1,
        "decode_dp_size": 2,
        "decode_tp_size": 1,
        "enable_ep": false,
        "server_port": 8080,
        "dp_rpc_port": 10000
    },
    "prefill_engine_config": {
    },
    "decode_engine_config": {
    },
    "router_config": {
      "port": 8000
    }
}