{
"Version": "1.0.0",
"LogConfig": {
"logLevel": "Info",
"logFileSize": 20,
"logFileNum": 20,
"logPath": "logs/mindservice.log"
},
"BackendConfig": {
"backendName": "mindieservice_llm_engine",
"modelInstanceNumber": 1,
"npuDeviceIds": [
[
0
]
],
"tokenizerProcessNumber": 8,
"multiNodesInferEnabled": false,
"multiNodesInferPort": 1120,
"interNodeTLSEnabled": false,
"interNodeTlsCaFile": "security/grpc/ca/ca.pem",
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsCrl": "security/grpc/certs/server_crl.pem",
"ModelDeployConfig": {
"maxSeqLen": 2560,
"maxInputTokenLen": 2048,
"truncation": 0,
"ModelConfig": [
{
"modelInstanceType": "StandardMock",
"modelName": "llama_65b",
"modelWeightPath": "/home/data/Qwen2.5-7B-Instruct",
"worldSize": 1,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "ms",
"trustRemoteCode": false
}
]
},
"ScheduleConfig": {
"templateType": "Standard",
"templateName": "Standard_LLM",
"cacheBlockSize": 128,
"maxPrefillBatchSize": 50,
"maxPrefillTokens": 8192,
"prefillTimeMsPerReq": 150,
"prefillPolicyType": 0,
"decodeTimeMsPerReq": 50,
"decodePolicyType": 0,
"maxBatchSize": 200,
"maxIterTimes": 1,
"maxPreemptCount": 0,
"supportSelectBatch": false,
"maxQueueDelayMicroseconds": 5000
}
}
}