{
"test_llama2_mcore_agieval_evaluate": [
{
"param": {
"task-data-path":"/data/ci/datasets/eval_data/agieval",
"task":"agieval",
"tensor-model-parallel-size": 8,
"pipeline-model-parallel-size": 1,
"num-layers": 32,
"use-mcore-models": null,
"hidden-size": 4096,
"ffn-hidden-size": 11008,
"num-attention-heads": 32,
"seq-length": 4096,
"max-new-tokens": 1,
"evaluation-batch-size": 1,
"max-position-embeddings": 4096,
"make-vocab-size-divisible-by": 1,
"micro-batch-size": 1,
"normalization": "RMSNorm",
"position-embedding-type": "rope",
"swiglu": null,
"tokenizer-not-use-fast": null,
"untie-embeddings-and-output-weights": null,
"disable-bias-linear": null,
"no-masked-softmax-fusion": null,
"fp16": null,
"no-load-rng": null,
"no-load-optim": null,
"load":"/data/ci/models/llama2/mg/llama2-tp8pp1",
"tokenizer-type":"PretrainedFromHF",
"tokenizer-name-or-path":"/data/ci/models/llama2/hf/llama-2-7b-hf",
"npu-deterministic": null,
"transformer-impl": "local"
}
}
],
"test_llama2_mcore_bbh_evaluate": [
{
"param": {
"task-data-path":"/data/ci/datasets/eval_data/bbh/test",
"task":"bbh",
"tensor-model-parallel-size": 8,
"pipeline-model-parallel-size": 1,
"use-mcore-models": null,
"num-layers": 32,
"hidden-size": 4096,
"ffn-hidden-size": 11008,
"num-attention-heads": 32,
"seq-length": 4096,
"max-new-tokens": 32,
"evaluation-batch-size": 4,
"max-position-embeddings": 4096,
"make-vocab-size-divisible-by": 1,
"micro-batch-size": 1,
"normalization": "RMSNorm",
"position-embedding-type": "rope",
"swiglu": null,
"tokenizer-not-use-fast": null,
"untie-embeddings-and-output-weights": null,
"disable-bias-linear": null,
"no-masked-softmax-fusion": null,
"fp16": null,
"no-load-rng": null,
"no-load-optim": null,
"load":"/data/ci/models/llama2/mg/llama2-tp8pp1",
"tokenizer-type":"PretrainedFromHF",
"tokenizer-name-or-path":"/data/ci/models/llama2/hf/llama-2-7b-hf",
"npu-deterministic": null,
"transformer-impl": "local"
}
}
],
"test_qwen2_mcore_mmlu_evaluate": [
{
"param": {
"use-mcore-models": null,
"use-kv-cache": null,
"task-data-path": "/data/ci/datasets/eval_data/mmlu/data/test",
"task": "mmlu",
"tensor-model-parallel-size": 1,
"pipeline-model-parallel-size": 1,
"micro-batch-size": 2,
"seq-length": 4096,
"max-position-embeddings": 4096,
"tokenizer-type": "PretrainedFromHF",
"tokenizer-name-or-path": "/data/ci/models/qwen2-1.5b/hf/Qwen2-1.5B",
"max-new-tokens": 1,
"make-vocab-size-divisible-by": 1,
"padded-vocab-size": 151936,
"rotary-base": 1000000,
"num-layers": 28,
"hidden-size": 1536,
"ffn-hidden-size": 8960,
"num-attention-heads": 12,
"group-query-attention": null,
"num-query-groups": 2,
"add-qkv-bias": null,
"disable-bias-linear": null,
"swiglu": null,
"position-embedding-type": "rope",
"load": "/data/ci/models/qwen2-1.5b/mg/qwen2-1.5b-hf-v0.1-tp1-pp1/",
"normalization": "RMSNorm",
"norm-epsilon": 1e-06,
"tokenizer-not-use-fast": null,
"exit-on-missing-checkpoint": null,
"no-load-rng": null,
"no-load-optim": null,
"no-gradient-accumulation-fusion": null,
"attention-softmax-in-fp32": null,
"seed": 42,
"bf16": null,
"no-chat-template": null,
"transformer-impl": "local"
}
}
]
}