import json
from pathlib import Path
import pytest
import optix.config.model_config as model_config
from optix.config.model_config import MindieModelConfig, ModelConfig
MODEL_SHAPE = {
"hidden_size": 4096,
"intermediate_size": 14336,
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"vocab_size": 128256,
"max_position_embeddings": 8192,
"torch_dtype": "float16",
}
EXPECTED_MODEL_ATTRS = {
"hidden_size": 4096,
"intermediate_size": 14336,
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"vocab_size": 128256,
"max_position_embeddings": 8192,
"kvcache_dtype_byte": 2,
"cache_num": 2,
}
def write_json(path: Path, payload: dict) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload), encoding="utf-8")
return path
@pytest.fixture
def llama_config_path(tmp_path):
return write_json(tmp_path / "llama3-8b" / "config.json", MODEL_SHAPE)
@pytest.fixture
def mindie_config_path(llama_config_path):
weight_path = llama_config_path.parent
return write_json(
weight_path.parent / "mindie" / "config.json",
{
"Version": "1.0.0",
"ServerConfig": {
"ipAddress": "127.0.0.1",
"managementIpAddress": "127.0.0.2",
"port": 8825,
"managementPort": 8826,
"metricsPort": 8827,
"allowAllZeroIpListening": False,
"maxLinkNum": 1000,
"httpsEnabled": False,
"fullTextEnabled": False,
"tlsCaPath": "security/ca/",
"tlsCaFile": ["ca.pem"],
"tlsCert": "security/certs/server.pem",
"tlsPk": "security/keys/server.key.pem",
"tlsPkPwd": "security/pass/key_pwd.txt",
"tlsCrlPath": "security/certs/",
"tlsCrlFiles": ["server_crl.pem"],
"managementTlsCaFile": ["management_ca.pem"],
"managementTlsCert": "security/certs/management/server.pem",
"managementTlsPk": "security/keys/management/server.key.pem",
"managementTlsPkPwd": "security/pass/management/key_pwd.txt",
"managementTlsCrlPath": "security/management/certs/",
"managementTlsCrlFiles": ["server_crl.pem"],
"kmcKsfMaster": "tools/pmt/master/ksfa",
"kmcKsfStandby": "tools/pmt/standby/ksfb",
"inferMode": "standard",
"interCommTLSEnabled": False,
"interCommPort": 1121,
"interCommTlsCaPath": "security/grpc/ca/",
"interCommTlsCaFiles": ["ca.pem"],
"interCommTlsCert": "security/grpc/certs/server.pem",
"interCommPk": "security/grpc/keys/server.key.pem",
"interCommPkPwd": "security/grpc/pass/key_pwd.txt",
"interCommTlsCrlPath": "security/grpc/certs/",
"interCommTlsCrlFiles": ["server_crl.pem"],
"openAiSupport": "vllm",
"tokenTimeout": 600,
"e2eTimeout": 600,
"distDPServerEnabled": False,
},
"BackendConfig": {
"backendName": "mindieservice_llm_engine",
"modelInstanceNumber": 1,
"npuDeviceIds": [[3]],
"tokenizerProcessNumber": 8,
"multiNodesInferEnabled": False,
"multiNodesInferPort": 1120,
"interNodeTLSEnabled": False,
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCaFiles": ["ca.pem"],
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd": "security/pass/mindie_server_key_pwd.txt",
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsCrlFiles": ["server_crl.pem"],
"interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
"ModelDeployConfig": {
"maxSeqLen": 2560,
"maxInputTokenLen": 2048,
"truncation": False,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "llama3-8b",
"modelWeightPath": str(weight_path),
"worldSize": 1,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": True,
}
],
},
"ScheduleConfig": {
"templateType": "Standard",
"templateName": "Standard_LLM",
"cacheBlockSize": 128,
"maxPrefillBatchSize": 340,
"maxPrefillTokens": 8192,
"prefillTimeMsPerReq": 714,
"prefillPolicyType": 0,
"decodeTimeMsPerReq": 362,
"decodePolicyType": 3,
"maxBatchSize": 650,
"maxIterTimes": 512,
"maxPreemptCount": 121,
"supportSelectBatch": False,
"maxQueueDelayMicroseconds": 200499,
},
},
},
)
@pytest.mark.parametrize(
"factory,path_name,error_type",
[
(ModelConfig, "missing_model.json", FileNotFoundError),
(MindieModelConfig, "missing_mindie.json", FileNotFoundError),
],
)
def test_config_path_must_exist(tmp_path, factory, path_name, error_type):
with pytest.raises(error_type):
factory(tmp_path / path_name)
@pytest.mark.parametrize(
"factory,file_name,content",
[
(ModelConfig, "invalid_model.json", "{invalid_json}"),
(MindieModelConfig, "invalid_mindie.json", "invalid json content"),
],
)
def test_invalid_json_is_rejected(tmp_path, factory, file_name, content):
config_path = tmp_path / file_name
config_path.write_text(content, encoding="utf-8")
with pytest.raises(ValueError):
factory(config_path)
def test_model_config_loads_core_shape(llama_config_path):
config = ModelConfig(llama_config_path)
assert {name: getattr(config, name) for name in EXPECTED_MODEL_ATTRS} == EXPECTED_MODEL_ATTRS
def test_model_config_rejects_missing_required_key(tmp_path):
incomplete_path = write_json(
tmp_path / "missing_param_config.json",
{
"hidden_size": 768,
"intermediate_size": 3072,
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_key_value_heads": 12,
"vocab_size": 30522,
},
)
with pytest.raises(ValueError):
ModelConfig(incomplete_path)
def test_one_token_cache_uses_kv_shape(llama_config_path):
assert ModelConfig(llama_config_path).get_one_token_cache() == 131072
def fake_npu_memory(device_id: int = 0):
return 65535, 3
def fake_settings():
return type("SettingsStub", (), {"mem_coefficient": 0.8})()
def test_mindie_config_reads_deploy_and_schedule_values(mindie_config_path, monkeypatch):
monkeypatch.setattr(model_config, "get_npu_total_memory", fake_npu_memory)
monkeypatch.setattr(model_config, "get_settings", fake_settings)
config = MindieModelConfig(mindie_config_path)
assert config.npu_device_ids == [[3]]
assert config.cache_block_size == 128
assert config.max_input_length == 8192
assert config.tp_size == 1
def test_mindie_config_calculates_available_kv_cache_memory(mindie_config_path, monkeypatch):
monkeypatch.setattr(model_config, "get_settings", fake_settings)
config = MindieModelConfig(mindie_config_path, npu_total_mem=65535, memory_usage_rate=3)
result = config.get_npu_mem_size()
expected = 65535 * (100 - 3) / 100 / 1024 * 0.8 - 14356.015625 / 1024 - 4798283776 / 1024 / 1024 / 1024
assert result == expected
def test_model_config_repr(llama_config_path):
config = ModelConfig(llama_config_path)
repr_str = repr(config)
assert "ModelConfig" in repr_str
assert "hidden_size=4096" in repr_str
def test_model_config_get_peak_activations_size(llama_config_path):
config = ModelConfig(llama_config_path)
result = config.get_peak_activations_size(max_prefill_token=2048, sequence_length=4096)
assert result > 0
assert isinstance(result, (int, float))
def test_model_config_calculate_model_weights_size(llama_config_path):
config = ModelConfig(llama_config_path)
assert config.memory_mb > 0
@pytest.mark.parametrize(
"dtype_str,expected",
[
("float16", 2),
("bfloat16", 2),
("fp16", 2),
("int8", 1),
("float32", 4),
("fp32", 4),
("unknown_type", 2),
],
)
def test_kvcache_dtype_byte_mapping(tmp_path, dtype_str, expected):
data = {**MODEL_SHAPE, "torch_dtype": dtype_str}
config_path = write_json(tmp_path / "config.json", data)
config = ModelConfig(config_path)
assert config.kvcache_dtype_byte == expected
def test_kvcache_dtype_byte_no_dtype_field(tmp_path):
data = {k: v for k, v in MODEL_SHAPE.items() if k != "torch_dtype"}
config_path = write_json(tmp_path / "config.json", data)
config = ModelConfig(config_path)
assert config.kvcache_dtype_byte == 2
def test_one_token_cache_zero_attention_heads(tmp_path):
data = {**MODEL_SHAPE, "num_attention_heads": 0}
config_path = write_json(tmp_path / "config.json", data)
config = ModelConfig(config_path)
assert config.get_one_token_cache() == 0
def test_mindie_model_config_get_max_batch_size_bound(mindie_config_path, monkeypatch):
monkeypatch.setattr(model_config, "get_settings", fake_settings)
config = MindieModelConfig(mindie_config_path, npu_total_mem=65535, memory_usage_rate=3)
lb, ub = config.get_max_batch_size_bound()
assert lb >= 0
assert ub >= lb
def test_mindie_model_config_npumemsize_positive(tmp_path, monkeypatch):
"""Test when npuMemSize is a positive number (not -1)"""
monkeypatch.setattr(model_config, "get_settings", fake_settings)
model_path = tmp_path / "model"
model_path.mkdir()
write_json(model_path / "config.json", MODEL_SHAPE)
mindie_cfg = {
"BackendConfig": {
"npuDeviceIds": [[0]],
"ScheduleConfig": {
"cacheBlockSize": 128,
"maxPrefillTokens": 8192,
"maxIterTimes": 512,
},
"ModelDeployConfig": {"ModelConfig": [{"modelWeightPath": str(model_path), "npuMemSize": 10, "tp": 1}]},
}
}
cfg_path = write_json(tmp_path / "mindie_cfg.json", mindie_cfg)
config = MindieModelConfig(cfg_path, npu_total_mem=65535, memory_usage_rate=3)
assert config.mem_for_kv_cache_gb == 10
def test_model_config_generic_exception_raises_ioerror(tmp_path, monkeypatch):
"""Test ModelConfig raises IOError when open_file raises a generic exception"""
from unittest.mock import patch
config_path = tmp_path / "config.json"
config_path.write_text('{"valid": "json"}', encoding="utf-8")
with patch(
"optix.config.model_config.open_file",
side_effect=PermissionError("no access"),
):
with pytest.raises(IOError):
ModelConfig(config_path)
def test_mindie_model_config_generic_exception_raises_ioerror(tmp_path, monkeypatch):
"""Test MindieModelConfig raises IOError when open_file raises a generic exception"""
from unittest.mock import patch
config_path = tmp_path / "config.json"
config_path.write_text('{"valid": "json"}', encoding="utf-8")
with patch(
"optix.config.model_config.open_file",
side_effect=PermissionError("no access"),
):
with pytest.raises(IOError):
MindieModelConfig(config_path)
def test_mindie_model_get_max_batch_size_zero_kv_block(mindie_config_path, monkeypatch):
"""Test get_max_batch_size_bound raises when one_kv_block_size is 0"""
monkeypatch.setattr(model_config, "get_settings", fake_settings)
config = MindieModelConfig(mindie_config_path, npu_total_mem=65535, memory_usage_rate=3)
config.model_config.num_attention_heads = 0
with pytest.raises(ValueError, match="one_kv_block_size is 0"):
config.get_max_batch_size_bound()