[URL]
repository_url = https://gitcode.com/Ascend/msmodelslim
question_and_answer_url = https://gitcode.com/Ascend/msmodelslim/blob/master/docs/zh/appendix/faq.md
[ModelAdapter]
default = default
deepseek_v3 = DeepSeek-V3, DeepSeek-V3-0324, DeepSeek-R1, DeepSeek-R1-0528, DeepSeek-V3.1
deepseek_v3_2 = DeepSeek-V3.2-Exp, DeepSeek-V3.2
glm_5 = GLM-5
qwen1_5 = Qwen1.5-110B
qwen2 = Qwen2-7B, Qwen2-7B-Instruct, Qwen2-72B
qwen2_5 = Qwen2.5-7B-Instruct, Qwen2.5-32B-Instruct, Qwen2.5-72B-Instruct, Qwen2.5-Coder-7B-Instruct,
DeepSeek-R1-Distill-Qwen-1.5B, DeepSeek-R1-Distill-Qwen-7B
qwen3 = Qwen3-8B, Qwen3-14B, Qwen3-32B
qwen3_moe = Qwen3-30B, Qwen3-235B, Qwen3-Coder-480B-A35B
qwq = Qwen-QwQ-32B, QwQ-32B
wan2_1 = Wan2_1, Wan2.1
qwen3_next = Qwen3-Next-80B-A3B-Instruct
wan2_2 = Wan2_2, Wan2.2, Wan2.2-I2V-A14B, Wan2.2-T2V-A14B, Wan2.2-TI2V-5B
hunyuan_video = HunyuanVideo, hunyuan_video, hunyuan-video, hunyuanvideo
qwen3_vl = Qwen3-VL-4B-Instruct, Qwen3-VL-32B-Instruct
qwen3_vl_moe = Qwen3-VL-30B-A3B, Qwen3-VL-235B-A22B
qwen3_omni_moe = Qwen3-Omni-30B-A3B-Thinking, Qwen3-Omni-30B-A3B-Instruct
kimi_k2 = Kimi-K2-Instruct-0905, Kimi-K2-Thinking
flux1 = FLUX.1-dev
glm4_moe = GLM-4.5, GLM-4.6, GLM-4.7
qwen2_5_vl = Qwen2.5-VL-7B-Instruct, Qwen2.5-VL-72B-Instruct, Qwen2.5-VL-32B-Instruct
qwen3_5_moe = Qwen3.5-397B-A17B, Qwen3.5-27B, Qwen3.5-122B-A10B, Qwen3.5-35B-A3B
qwen2_5_omni_thinker = Qwen2.5-Omni-7B
glm4_6v = GLM-4.6V
qwen_image_edit = Qwen-Image-Edit-2509
[ModelAdapterEntryPoints]
default = msmodelslim.model.default.model_adapter:DefaultModelAdapter
deepseek_v3 = msmodelslim.model.deepseek_v3.model_adapter:DeepSeekV3ModelAdapter
deepseek_v3_2 = msmodelslim.model.deepseek_v3_2.model_adapter:DeepSeekV32ModelAdapter
glm_5 = msmodelslim.model.glm_5.model_adapter:GLM5ModelAdapter
qwen1_5 = msmodelslim.model.qwen1_5.model_adapter:Qwen15ModelAdapter
qwen2 = msmodelslim.model.qwen2.model_adapter:Qwen2ModelAdapter
qwen2_5 = msmodelslim.model.qwen2_5.model_adapter:Qwen25ModelAdapter
qwen3 = msmodelslim.model.qwen3.model_adapter:Qwen3ModelAdapter
qwen3_moe = msmodelslim.model.qwen3_moe.model_adapter:Qwen3MoeModelAdapter
qwq = msmodelslim.model.qwq.model_adapter:QwqModelAdapter
wan2_1 = msmodelslim.model.wan2_1.model_adapter:Wan2Point1Adapter
qwen3_next = msmodelslim.model.qwen3_next.model_adapter:Qwen3NextModelAdapter
wan2_2 = msmodelslim.model.wan2_2.model_adapter:Wan2Point2Adapter
hunyuan_video = msmodelslim.model.hunyuan_video.model_adapter:HunyuanVideoModelAdapter
qwen3_vl = msmodelslim.model.qwen3_vl.model_adapter:Qwen3VLModelAdapter
qwen3_vl_moe = msmodelslim.model.qwen3_vl_moe.model_adapter:Qwen3VLMoeModelAdapter
qwen3_omni_moe = msmodelslim.model.qwen3_omni_moe.model_adapter:Qwen3OmniMoeThinkerModelAdapter
kimi_k2 = msmodelslim.model.kimi_k2.model_adapter:KimiK2ModelAdapter
flux1 = msmodelslim.model.flux1.model_adapter:FLUX1ModelAdapter
glm4_moe = msmodelslim.model.glm4_moe.model_adapter:GLM4MoeModelAdapter
qwen2_5_vl = msmodelslim.model.qwen2_5_vl.model_adapter:Qwen25VLModelAdapter
qwen3_5_moe = msmodelslim.model.qwen3_5_moe.model_adapter:Qwen3_5ModelAdapter
qwen2_5_omni_thinker = msmodelslim.model.qwen2_5_omni_thinker.model_adapter:Qwen25OmniThinkerModelAdapter
glm4_6v = msmodelslim.model.glm4_6v.model_adapter:GLM4_6VModelAdapter
qwen_image_edit = msmodelslim.model.qwen_image_edit.model_adapter:QwenImageEditModelAdapter
[ModelAdapterDependencies]
deepseek_v3 = {"transformers": "==4.48.2"}
deepseek_v3_2 = {"transformers": "==4.48.2"}
glm4_moe = {"transformers": "==4.57.3"}
glm_5 = {"transformers": ">=5.0.0"}
qwen2_5_vl = {"transformers": "==4.49.0"}
qwen3 = {"transformers": ">=4.51.0"}
qwen3_moe = {"transformers": ">=4.51.0"}
qwen3_omni_moe = {"transformers": "==4.57.3"}
qwen3_next = {"transformers": ">=4.57.0"}
qwen3_vl = {"transformers": ">=4.57.1"}
kimi_k2 = {"transformers": "==4.48.2"}
qwen3_5_moe = {"transformers": "==5.2.0"}
flux1 = {"diffusers": ">=0.33.0,<=0.33.1"}
qwen2_5_omni_thinker = {"transformers": "==4.57.3"}
glm4_6v = {"transformers": "==5.0.0rc0"}
[Plugin:tuning_strategy]
standing_high = msmodelslim.core.tune_strategy.standing_high.strategy:get_plugin
standing_high_with_experience = msmodelslim.core.tune_strategy.standing_high_with_experience.strategy:get_plugin
[Plugin:evaluation]
service_oriented = msmodelslim.infra.service_oriented_evaluate_service:get_plugin
[Plugin:quant_service]
modelslim_v0 = msmodelslim.core.quant_service.modelslim_v0.quant_service:get_plugin
modelslim_v1 = msmodelslim.core.quant_service.modelslim_v1.quant_service:get_plugin
multimodal_sd_modelslim_v1 = msmodelslim.core.quant_service.multimodal_sd_v1.quant_service:get_plugin
multimodal_vlm_modelslim_v1 = msmodelslim.core.quant_service.multimodal_vlm_v1.quant_service:get_plugin
[Plugin:precheck_rule]
garbled_text = msmodelslim.infra.evaluation.precheck.garbled_text_rule:get_plugin
expected_answer = msmodelslim.infra.evaluation.precheck.expected_answer_rule:get_plugin