{
"test_llama2_mcore_prompt_greedy_search": [
{
"param": {
"tensor-model-parallel-size": 8,
"pipeline-model-parallel-size": 1,
"use-mcore-models": null,
"use-kv-cache": null,
"use-flash-attn": null,
"use-fused-swiglu": null,
"use-fused-rmsnorm": null,
"use-fused-rotary-pos-emb": null,
"num-layers": 32,
"hidden-size": 4096,
"ffn-hidden-size": 11008,
"position-embedding-type": "rope",
"seq-length": 4096,
"max-new-tokens": 30,
"micro-batch-size": 1,
"global-batch-size": 1,
"num-attention-heads": 32,
"max-position-embeddings": 4096,
"make-vocab-size-divisible-by": 1,
"swiglu": null,
"normalization": "RMSNorm",
"load":"/data/ci/models/llama2/mg/llama2-tp8pp1",
"tokenizer-type": "PretrainedFromHF",
"tokenizer-name-or-path":"/data/ci/models/llama2/hf/llama-2-7b-hf",
"tokenizer-model": "/data/ci/models/llama2/hf/llama-2-7b-hftokenizer.model",
"disable-bias-linear": null,
"attention-softmax-in-fp32": null,
"untie-embeddings-and-output-weights": null,
"no-masked-softmax-fusion": null,
"no-load-optim": null,
"no-load-rng": null,
"fp16": null,
"task":"greedy",
"npu-deterministic": null,
"prompt-type": "llama2",
"transformer-impl": "local"
}
}
],
"test_deepseek2_mcore_greedy_search": [
{
"param": {
"use-mcore-models": null,
"spec": ["mindspeed_llm.tasks.models.spec.deepseek_spec", "layer_spec"],
"tensor-model-parallel-size": 1,
"pipeline-model-parallel-size": 1,
"expert-model-parallel-size": 8,
"use-flash-attn": null,
"num-layers": 2,
"hidden-size": 5120,
"ffn-hidden-size": 12288,
"seq-length": 8192,
"max-new-tokens": 30,
"micro-batch-size": 1,
"global-batch-size": 16,
"num-attention-heads": 128,
"max-position-embeddings": 163840,
"position-embedding-type": "rope",
"swiglu": null,
"load": "/data/ci/models/deepseek2/mg/deepseek2-l2-tp1-pp1-ep8-new",
"tokenizer-type": "PretrainedFromHF",
"tokenizer-name-or-path": "/data/ci/models/deepseek2/hf/deepseek2_hf/",
"bf16": null,
"normalization": "RMSNorm",
"untie-embeddings-and-output-weights": null,
"disable-bias-linear": null,
"attention-softmax-in-fp32": null,
"no-load-optim": null,
"no-load-rng": null,
"no-masked-softmax-fusion": null,
"no-gradient-accumulation-fusion": null,
"task": "greedy",
"npu-deterministic": null,
"make-vocab-size-divisible-by": 1,
"shape-order": "BNSD",
"output-layer-slice-num": 10,
"use-fused-swiglu": null,
"use-fused-rmsnorm": null,
"use-fused-rotary-pos-emb": null,
"use-rotary-position-embeddings": null,
"vocab-size": 102400,
"padded-vocab-size": 102400,
"rotary-base": 10000,
"norm-epsilon": 1e-6,
"multi-latent-attention": null,
"qk-pos-emb-head-dim": 64,
"qk-head-dim": 128,
"q-lora-rank": 1536,
"kv-lora-rank": 512,
"v-head-dim": 128,
"qk-layernorm": null,
"moe-grouped-gemm": null,
"moe-permutation-async-comm": null,
"moe-token-dispatcher-type": "alltoall_seq",
"first-k-dense-replace": 1,
"moe-layer-freq": 1,
"n-shared-experts": 2,
"num-experts": 160,
"moe-router-topk": 6,
"moe-ffn-hidden-size": 1536,
"moe-router-load-balancing-type": "group_limited_greedy",
"moe-router-group-topk": 3,
"moe-router-num-groups": 8,
"moe-aux-loss-coeff": 0.003,
"moe-device-level-aux-loss-coeff": 0.05,
"moe-comm-aux-loss-coeff": 0.02,
"moe-router-topk-scaling-factor": 16.0,
"seq-aux": null,
"beta-fast": 32,
"beta-slow": 1,
"rope-scaling-factor": 40,
"rope-scaling-mscale": 1.0,
"rope-scaling-mscale-all-dim": 1.0,
"rope-scaling-original-max-position-embeddings": 4096,
"rope-scaling-type": "yarn",
"transformer-impl": "local"
}
}
],
"test_llama3_mcore_greedy_search_with_tp2pp4sp": [
{
"param": {
"tensor-model-parallel-size": 2,
"pipeline-model-parallel-size": 4,
"sequence-parallel": null,
"use-mcore-models": null,
"use-fused-swiglu": null,
"use-fused-rmsnorm": null,
"use-fused-rotary-pos-emb": null,
"num-layers": 32,
"hidden-size": 4096,
"ffn-hidden-size": 14336,
"position-embedding-type": "rope",
"rotary-base": 500000,
"seq-length": 8192,
"max-position-embeddings": 8192,
"max-new-tokens": 50,
"micro-batch-size": 1,
"num-attention-heads": 32,
"num-query-groups": 8,
"group-query-attention": null,
"make-vocab-size-divisible-by": 16032,
"swiglu": null,
"normalization": "RMSNorm",
"norm-epsilon": 1e-5,
"hidden-dropout": 0,
"attention-dropout": 0,
"load": "/data/ci/models/llama3/mg/llama3-8b-mcore-tp2-pp4",
"tokenizer-type": "PretrainedFromHF",
"tokenizer-name-or-path":"/data/ci/models/llama3/hf/llama-3-8b-hf",
"tokenizer-model": "/data/ci/models/llama3/hf/llama-3-8b-hf/tokenizer.model",
"disable-bias-linear": null,
"attention-softmax-in-fp32": null,
"untie-embeddings-and-output-weights": null,
"no-masked-softmax-fusion": null,
"no-load-optim": null,
"no-load-rng": null,
"bf16": null,
"task":"greedy",
"seed": 42,
"npu-deterministic": null,
"transformer-impl": "local"
}
}
]
}