{
    "model_id": "qwen3_omni_moe",
    "init_from_hf_path": "/home/ci_resource/models/Qwen3-Omni-30B-A3B-Instruct",
    "image_encoder": {
        "vision_encoder": {
            "model_id": "qwen2vit",
            "num_layers": 32,
            "hidden_size": 1280,
            "num_attention_heads": 16,
            "freeze": true,
            "attn_implementation": "flash_attention_2",
            "attn_layout": "BNSD"
        },
        "vision_projector": {
            "model_id": "lnmlp",
            "num_layers": 1,
            "freeze": true
        }
    },
    "audio_encoder": {
        "audio_encoder": {
            "model_id": "qwen_omni",
            "num_layers": 32,
            "hidden_size": 1280,
            "num_attention_heads": 20,
            "freeze": true,
            "attn_implementation": "flash_attention_2",
            "attn_layout": "BNSD"
        }
    },
    "text_decoder": {
        "model_id": "qwen2_5_omni_thinker",
        "num_layers": 28,
        "hidden_size": 3584,
        "num_attention_heads": 28,
        "max_position_embeddings": 128000,
        "freeze": false,
        "attn_implementation": "flash_attention_2",
        "attn_layout": "BNSD",
        "activation_offload": true
    },
    "loss_cfg": {
        "compute_mode": "dynamic_chunk",
        "chunk_size": 4096
    }
}