{
"Version": "1.0.0",
"LogConfig": {
"logLevel": "Info",
"logFileSize": 20,
"logFileNum": 20,
"logPath": "logs/mindservice.log"
},
"BackendConfig": {
"backendName": "mindieservice_llm_engine",
"modelInstanceNumber": 1,
"npuDeviceIds": [
[
0,
1,
2,
3
]
],
"tokenizerProcessNumber": 8,
"multiNodesInferEnabled": true,
"multiNodesInferPort": 1120,
"interNodeTLSEnabled": true,
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCaFiles": [
"ca.pem"
],
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsCrlFiles": [
"server_crl.pem"
],
"ModelDeployConfig": {
"maxSeqLen": 2560,
"maxInputTokenLen": 2048,
"truncation": 0,
"ModelConfig": [
{
"modelInstanceType": "StandardMock",
"modelName": "llama_65b",
"modelWeightPath": "/home/data/Meta-Llama-3-8B/",
"worldSize": 4,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "ms",
"trustRemoteCode": false,
"plugin_params": "{\"plugin_type\":\"prefix_cache\"}"
}
],
"LoraModules": [
{
"name": "test",
"path": "test",
"baseModelName": "test"
}
]
},
"ScheduleConfig": {
"templateType": "Standard",
"templateName": "Standard_LLM",
"cacheBlockSize": 128,
"maxPrefillBatchSize": 50,
"maxPrefillTokens": 8192,
"prefillTimeMsPerReq": 150,
"prefillPolicyType": 0,
"decodeTimeMsPerReq": 50,
"decodePolicyType": 0,
"maxBatchSize": 200,
"maxIterTimes": 512,
"maxPreemptCount": 0,
"supportSelectBatch": false,
"maxQueueDelayMicroseconds": 5000,
"enablePrefixCache": true
}
}
}