{
"model_id": "qwen3_omni_moe",
"init_from_hf_path": "/home/ci_resource/models/Qwen3-Omni-30B-A3B-Instruct",
"image_encoder": {
"vision_encoder": {
"model_id": "qwen2vit",
"num_layers": 32,
"hidden_size": 1280,
"num_attention_heads": 16,
"freeze": true,
"attn_implementation": "flash_attention_2",
"attn_layout": "BNSD"
},
"vision_projector": {
"model_id": "lnmlp",
"num_layers": 1,
"freeze": true
}
},
"audio_encoder": {
"audio_encoder": {
"model_id": "qwen_omni",
"num_layers": 32,
"hidden_size": 1280,
"num_attention_heads": 20,
"freeze": true,
"attn_implementation": "flash_attention_2",
"attn_layout": "BNSD"
}
},
"text_decoder": {
"model_id": "qwen2_5_omni_thinker",
"num_layers": 28,
"hidden_size": 3584,
"num_attention_heads": 28,
"max_position_embeddings": 128000,
"freeze": false,
"attn_implementation": "flash_attention_2",
"attn_layout": "BNSD",
"activation_offload": true
},
"loss_cfg": {
"compute_mode": "dynamic_chunk",
"chunk_size": 4096
}
}