{
"Version" : "1.0.0",
"ServerConfig" :
{
"ipAddress" : "127.0.0.1",
"managementIpAddress" : "127.0.0.2",
"port" : 1025,
"managementPort" : 1026,
"metricsPort" : 1027,
"allowAllZeroIpListening" : false,
"maxLinkNum" : 1000,
"httpsEnabled" : false,
"fullTextEnabled" : false,
"tlsCaPath" : "config_manager/conf/cert/",
"tlsCaFile" : ["ca.pem"],
"tlsCert" : "config_manager/conf/cert/server.pem",
"tlsPk" : "config_manager/conf/cert/server.key.pem",
"tlsCrlPath" : "config_manager/conf/cert/",
"tlsCrlFiles" : ["server_crl.pem"],
"managementTlsCaFile" : ["management_ca.pem"],
"managementTlsCert" : "config_manager/conf/cert/server.pem",
"managementTlsPk" : "config_manager/conf/cert/server.key.pem",
"managementTlsCrlPath" : "config_manager/conf/cert/",
"managementTlsCrlFiles" : ["server_crl.pem"],
"inferMode" : "standard",
"interCommTLSEnabled" : true,
"interCommPort" : 1121,
"interCommTlsCaPath" : "config_manager/conf/cert/",
"interCommTlsCaFiles" : ["ca.pem"],
"interCommTlsCert" : "config_manager/conf/cert/server.pem",
"interCommPk" : "config_manager/conf/cert/server.key.pem",
"interCommTlsCrlPath" : "config_manager/conf/cert/",
"interCommTlsCrlFiles" : ["server_crl.pem"],
"openAiSupport" : "vllm",
"tokenTimeout" : 5,
"e2eTimeout" : 5,
"distDPServerEnabled":false
},
"BackendConfig" : {
"backendName" : "mindieservice_llm_engine",
"modelInstanceNumber" : 2,
"npuDeviceIds" : [[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7]],
"tokenizerProcessNumber" : 2,
"multiNodesInferEnabled" : true,
"multiNodesInferPort" : 1120,
"interNodeTLSEnabled" : false,
"interNodeTlsCaPath" : "config_manager/conf/",
"interNodeTlsCaFiles" : ["ca.pem"],
"interNodeTlsCrlFilesVec" : ["ca.pem"],
"interNodeTlsCert" : "config_manager/conf/certs/server.pem",
"interNodeTlsPk" : "config_manager/conf/server.key.pem",
"interNodeTlsCrlPath" : "config_manager/conf/certs/",
"interNodeTlsCrlFiles" : ["server_crl.pem"],
"kvPoolConfig" : {"backend":"", "configPath":""},
"ModelDeployConfig" :
{
"maxSeqLen" : 2560,
"maxInputTokenLen" : 2048,
"truncation" : 0,
"LoraModules" : [
{
"name" : "llama_65b",
"path" : "../../config_manager/conf",
"baseModelName" : "llama_65b"
}
],
"ModelConfig" : [
{
"modelInstanceType" : "StandardMock",
"modelName" : "llama_65b",
"modelWeightPath" : "../../config_manager/conf",
"worldSize" : 8,
"cpuMemSize" : 5,
"npuMemSize" : -1,
"backendType" : "atb",
"trustRemoteCode" : false
}
]
},
"ScheduleConfig" :
{
"templateType" : "Standard",
"templateName" : "Standard_LLM",
"cacheBlockSize" : 128,
"maxPrefillBatchSize" : 50,
"maxPrefillTokens" : 8192,
"prefillTimeMsPerReq" : 150,
"prefillPolicyType" : 0,
"bufferResponseEnabled" : false,
"decodeTimeMsPerReq" : 50,
"decodePolicyType" : 0,
"policyType" : 0,
"enableSplit" : true,
"splitType" : true,
"splitStartType": true,
"splitChunkTokens" : 1,
"splitStartBatchSize" : 100,
"enablePrefixCache" : false,
"maxBatchSize" : 200,
"maxIterTimes" : 512,
"maxPreemptCount" : 0,
"supportSelectBatch" : true,
"maxQueueDelayMicroseconds" : 5000,
"decodeExpectedTime" : 5,
"prefillExpectedTime" : 5,
"stageSelectPolicy" : 1,
"dynamicBatchSizeEnable" : true,
"maxNumPartialPrefills" : 5,
"maxLongPartialPrefills" : 5,
"longPrefillTokenThreshold" : 5
}
}
}