蓝区资源清单

蓝区重点模型权重列表

为了方便开发者贡献测试用例,蓝区服务器上已配置了部分重点模型,路径和配置信息如下表所示

模型 路径 来源 描述
Qwen3-30B-A3B /data/ci/models/Qwen3-30B-A3B/hf/Qwen3-30B-A3B modelscope 完整权重
Qwen3-8B /data/ci/models/Qwen3-8B/hf/Qwen3-8B modelscope 完整权重
Qwen3-Next /data/ci/models/qwen3_next/hf/Qwen3-Next-80B-A3B-hf 预训练生成 num-layers=4
gpt-oss-20b /data/ci/models/gpt_oss_20b/hf/gpt-oss-20b modelscope 完整权重
Seed-OSS-36B-Instruct /data/ci/models/Seed-OSS-36B-Instruct/hf/Seed-OSS-36B-Instruct modelscope 完整权重,num-layers=4
LongCat-Flash /data/ci/models/longcat-flash-560b/hf/longcat-flash-560b-generated 预训练生成 num-layers=1,num-experts=16, num-zero-experts=8, hidden-size=2048
DeepSeek-V3.2 /data/ci/models/deepseek32/hf/deepseek-v32-generated 预训练生成 num-layers=4, noop-layers=3, first-k-dense-replace=0, num-experts=8, moe-router-topk=2, moe-router-num-groups=4, moe-router-group-topk=1
GLM-5 待补充 待补充
Kimi-Linear-48B 待补充 待补充

测试用例占用蓝区资源登记

为了方便蓝区服务器对磁盘文件的统一管理,用户补充测试用例时请对测试用例所使用的权重、词表和数据集文件进行登记

用例名 hf权重和词表路径 mg权重路径 原数据集路径 处理后数据集路径 评估数据集路径 缓存文件路径
test_deepseek3_hf2mcore_tp2ep8etp1 /data/ci/models/deepseek3/hf/deepseek3-hf-L2-mtp / / / / /data/ci/cache/deepseek3-mtp-L2-tp2ep8etp1
test_deepseek3_mcore2hf_tp2ep8etp1 / /data/ci/cache/deepseek3-mtp-L2-tp2ep8etp1 / / / /data/ci/cache/deepseek3-hf-L2-mtp-bk
high_availability_error_dump_ptd /data/ci/models/llama2/hf/llama-high-availability/tokenizer.model / / /data/ci/datasets/processed/dataset-high-availability/llama_text_document / /data/ci/cache/temp-checkpoint-high-availability/
high_availability_uce_error_ptd /data/ci/models/llama2/hf/llama-high-availability/tokenizer.model / / /data/ci/datasets/processed/dataset-high-availability/llama_text_document / /
mamba2_8b_tp4_pp1_cp2_recompute_4k_ptd /data/ci/models/mamba2/hf/mamba2-8b-hf/mamba2_8b.model / / /data/ci/datasets/processed/mamba2_8b_enwiki/mamba_enwiki_text_document / /
qwen3_next_80b_4K_A3_ptd /data/ci/models/qwen3_next/hf/Qwen3-Next-80B-A3B-hf / / /data/ci/datasets/processed/qwen3_next_data/qwen3_next_aplaca_text_document / /
dpo_qwen3_30b_a3b_16K_A3_ptd_tp2pp4.sh /data/ci/models/Qwen3-30B-A3B/hf/Qwen3-30B-A3B / / /data/ci/datasets/origin/pairwise_dataset/output/orca_rlhf/orca_rlhf / /data/ci/cache/qwen3-30b-pp4tp2layer4
test_mamba2_hf2mcore_tp1pp2 /data/ci/models/mamba2/hf/mamba2-2.7b-hf / / / / /data/ci/cache/mamba2_tp1pp2
test_mamba2_mcore2hf_tp1pp2 / /data/ci/cache/mamba2_tp1pp2 / / / /data/ci/cache/mamba2_2.7b_hf
test_llama3_hf2mcore_tp8pp1_tp2d /data/ci/models/llama3/hf/llama-3-8b-hf / / / / /data/ci/cache/llama3-8b-tp8pp1-tp2d
test_llama3_mcore2hf_tp8pp1_tp2d / /data/ci/cache/llama3-8b-tp8pp1-tp2d / / / /data/ci/cache/llama-3-8b-hf-ckpt-param-mg2hf
llama2_tp4cp2pp1_tp2d_tpx2tpy2_ulysses /data/ci/models/llama2/hf/llama-2-7b-hf/ /data/ci/models/llama2/mg/llama2-2dtp-tp4cp2tpx2tpy2_ulyssescp/ / /data/ci/datasets/processed/llama2_7b_pretrain/alpaca_text_document / /
llama2_tp4cp2pp1_tp2d_tpx2tpy2_ringcp /data/ci/models/llama2/hf/llama-2-7b-hf/ /data/ci/models/llama2/mg/llama2-2dtp-tp4cp2pp1tpx2tpy2_ringcp/ / /data/ci/datasets/processed/llama2_7b_pretrain/alpaca_text_document / /
dpo_llama2_tp1_pp1_cyclic_pairwise /data/ci/models/llama2/hf/llama-2-7b-hf /data/ci/models/llama2/mg/llama2-7b_2l_tp1pp1 / /data/ci/datasets/processed/orca/orca_rlhf / /
test_pretrain_datasets_GPTSentencePieceTokenizer /data/ci/models/mamba2/hf/mamba2-2.7b-hf/mt_nlg_plus_multilingual_ja_zh_the_stack_frac_015_256k.model / /data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet /data/ci/cache/process_dataset/test_tokenizer_type/gptsentencepiece / /
test_reasoning_template /data/ci/models/qwen3_next/hf/Qwen3-Next-80B-A3B-hf / /data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet /data/ci/cache/process_dataset/test_template/qwen3_reasoning_template / /data/ci/cache/process_dataset/tmp
tune_llama3_8b_lora_tp1pp8.sh /data/ci/models/llama3/hf/llama-3-8b-hf/ /data/ci/cache/llama-3-8b-tp1pp8 /data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet /data/ci/cache/llama3-8b-dataset/llama3_8b / /data/ci/cache/llama-3-8b-tp1pp8
test_rlhf_qwen25_7b_tp2_pp2 /data/ci/models/qwen25/hf/Qwen2.5-7B /data/ci/models/qwen25/mg/qwen2.5_mcore_tp2_pp2_layers28 /data/ci/datasets/origin/dapo-math-17k.parquet /data/ci/datasets/processed/dapo-math-17k / /
pretrain_qwen3_8b_4k_fsdp2 /data/ci/models/Qwen3-8B/hf/Qwen3-8B / /data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet / / /
tune_gpt_oss_20b_a3b_4k_fsdp2 /data/ci/models/gpt_oss_20b/hf/gpt-oss-20b/ / /data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet / / /
test_longcat_flash_560b_hf2mcore_tp2pp2ep2etp1 /data/ci/models/longcat-flash-560b/hf/longcat-flash-560b-generated/ / / / / /data/ci/cache/longcat-flash-560b-tp2pp1ep2etp1/
test_longcat_flash_560b_mcore2hf_tp2pp2ep2etp1 / /data/ci/cache/longcat-flash-560b-tp2pp1ep2etp1/ / / / /data/ci/cache/longcat-flash-560b-mg2hf/
longcat_flash_560b_tp2pp1ep2etp1 /data/ci/models/longcat-flash-560b/hf/longcat-flash-chat/ /data/ci/models/longcat-flash-560b/mg/longcat-flash-560b-tp2pp1ep2etp1/ / /data/ci/datasets/processed/longcat-flash-dataset/alpaca_text_document/ / /
test_seed_oss_36b_hf2mcore_tp2pp2 /data/ci/models/Seed-OSS-36B-Instruct/hf/Seed-OSS-36B-Instruct/ / / / / /data/ci/cache/Seed-OSS-36B-tp2pp2/
test_seed_oss_36b_hf2mcore_tp2pp2 / /data/ci/cache/Seed-OSS-36B-tp2pp2/ / / / /data/ci/cache/Seed-OSS-36B-tp2pp2-mg2hf/
seed_oss_36b_tp2pp2 /data/ci/models/Seed-OSS-36B-Instruct/hf/Seed-OSS-36B-Instruct/ /data/ci/models/Seed-OSS-36B-Instruct/mg/Seed-OSS-36B-tp2pp2/ / /data/ci/datasets/processed/seed-oss/alpaca_text_document / /