{
    "model_id": "InternVL2.5",
    "pre_process": true,
    "post_process": true,
    "add_text_encoder": false,
    "img_embedding_idx": 1,
    "downsample_ratio": 0.5,
    "select_layer": -1,
    "ps_version": "v2",
    "add_rmsnorm_offset": false,
    "img_context_token_id": 151667,
    "text_decoder": {
	    "model_id": "internllm",
	    "num_layers": 36,
	    "pipeline_num_layers": [36],
	    "hidden_size": 2048,
	    "ffn_hidden_size": 11008,
	    "num_attention_heads": 16,
	    "num_key_value_heads": 2,
	    "max_position_embeddings": 32768,
	    "vocab_size": 151674,
        "rotary_percent": 1.0,
	    "rotary_base": 1000000,
	    "untie_embeddings_and_output_weights": true,
	    "disable_bias_linear": true,
	    "attention_dropout": 0.0,
	    "init_method_std": 0.01,
	    "hidden_dropout": 0.0,
	    "position_embedding_type": "rope",
	    "normalization": "RMSNorm",
	    "activation_func": "silu",
	    "attention_softmax_in_fp32": true,
	    "use_fused_rotary_pos_emb": false,
	    "params_dtype": "bf16",
	    "bf16": true,
	    "fp16_lm_cross_entropy": false,
	    "parallel_output": true,
	    "group_query_attention": true,
	    "num_query_groups": 2,
	    "rope_scaling": null,
	    "gated_linear_unit": true,
	    "layernorm_epsilon": 1e-06,
	    "add_bias_linear":false,
	    "add_qkv_bias": true
    },
    "image_encoder": {
        "vision_encoder": {
            "model_id": "InternViT",
            "num_layers": 24,
            "pipeline_num_layers": [24],
            "hidden_size": 1024,
            "ffn_hidden_size": 4096,
            "num_attention_heads": 16,
            "num_channels": 3,
            "patch_size": 14,
            "image_size": 448,
            "add_qkv_bias": true,
            "qk_layernorm": false,
            "activation_func": "gelu",
            "normalization": "LayerNorm",
            "layernorm_epsilon": 1e-6,
            "hidden_dropout": 0.0,
            "drop_path_rate": 0.0,
            "attention_dropout": 0.0,
            "init_method_std": 0.02,
            "initializer_factor": 1.0,
            "output_hidden_states": false,
            "use_return_dict": false,
            "params_dtype": "bf16",
            "post_layer_norm": false,
            "downsample_ratio": 0.5,
            "fp16": false,
            "bf16": true,
            "attention_softmax_in_fp32": false,
            "select_layer": -1,
            "ps_version": "v2",
            "pre_tockens": 2147483647,
            "next_tockens": 2147483647,
            "freeze": true
        },
        "vision_projector": {
            "model_id": "InternVLMLP",
            "downsample_ratio": 0.5,
            "vit_hidden_size": 1024,
            "llm_hidden_size": 2048,
            "params_dtype":"bf16",
            "num_layers":1
        }
    },
    "text_encoder": null,
    "video_encoder": null
}