{
    "infer_data_type": "image",
    "file_path": "./examples/internvl2.5/view.jpg",
    "prompts": "Please describe the image shortly.",
    "pipeline_class": "InternVLPipeline",
    "from_pretrained": "./ckpt/model_optim_rng.pt",
    "template": "internvl2_5",
    "dtype": "bf16",
    "device": "npu",
    "pre_process": true,
    "post_process": true,
    "add_text_encoder": false,
    "img_embedding_idx": 1,
    "downsample_ratio": 0.5,
    "select_layer": -1,
    "ps_version": "v2",
    "img_context_token_id": 151677,
    "num_segments": 8,
    "text_decoder": {
        "model_id": "Qwen2.5llm",
        "num_layers": 36,
        "hidden_size": 2048,
        "num_attention_heads": 16,
        "num_query_groups": 2,
        "ffn_hidden_size": 11008,
        "kv_channels": 128,
        "hidden_dropout": 0.0,
        "attention_dropout": 0.0,
        "layernorm_epsilon": 1e-06,
        "normalization": "RMSNorm",
        "qk_layernorm": false,
        "add_bias_linear": false,
        "add_qkv_bias": true,
        "bias_activation_fusion": false,
        "gated_linear_unit": true,
        "init_method_std": 0.01,
        "attention_softmax_in_fp32": true,
        "masked_softmax_fusion": false,
        "layernorm_zero_centered_gamma": false,
        "bias_dropout_fusion":false,
        "apply_rope_fusion": false,
        "memory_efficient_layer_norm": false,
        "max_position_embeddings": 4096,
        "fp16": false,
        "bf16": true,
        "params_dtype": "bf16",
        "fp16_lm_cross_entropy": false,
        "rotary_percent": 1.0,
        "rotary_base": 1000000,
        "position_embedding_type": "rope",
        "use_fused_rotary_pos_emb": false,
        "rope_scaling": null,
        "parallel_output": true,
        "initializer_factor": 1.0,
        "activation_func": "silu",
        "vocab_size": 151674,
        "is_encoder_decoder": false
    },
    "image_encoder": {
        "vision_encoder": {
            "model_id": "InternViT",
            "num_layers": 24,
            "hidden_size": 1024,
            "ffn_hidden_size": 4096,
            "num_attention_heads": 16,
            "num_channels": 3,
            "patch_size": 14,
            "image_size": 448,
            "add_qkv_bias": true,
            "qk_layernorm": false,
            "activation_func": "gelu",
            "normalization": "LayerNorm",
            "layernorm_epsilon": 1e-6,
            "hidden_dropout": 0.0,
            "drop_path_rate": 0.0,
            "attention_dropout": 0.0,
            "init_method_std": 0.02,
            "initializer_factor": 1.0,
            "output_hidden_states": false,
            "use_return_dict": false,
            "params_dtype": "bf16",
            "post_layer_norm": false,
            "downsample_ratio": 0.5,
            "fp16": false,
            "bf16": true,
            "attention_softmax_in_fp32": false,
            "select_layer": -1,
            "ps_version": "v2",
            "pre_tockens": 2147483647,
            "next_tockens": 2147483647,
            "freeze": true
        },
        "vision_projector": {
            "model_id": "InternVLMLP",
            "downsample_ratio": 0.5,
            "vit_hidden_size": 1024,
            "llm_hidden_size": 2048
        }
    },
    "tokenizer":{
        "hub_backend": "hf",
        "autotokenizer_name": "AutoTokenizer",
        "from_pretrained": "OpenGVLab/InternVL2_5-4B",
        "add_eos_token": false,
        "use_fast": false
    },
    "generation_config":{
        "do_sample": false,
        "bos_token_id": 151643,
        "eos_token_id": 151645,
        "pad_token_id": null,
        "max_length": 20,
        "max_new_tokens": 1024,
        "temperature": 1.0,
        "output_attentions":false,
        "output_hidden_states":false,
        "use_cache":false,
        "decoder_start_token_id":null,
        "min_new_tokens":null,
        "min_length":0,
        "constraints":null,
        "num_beams":1,
        "force_words_ids":null,
        "top_k":50,
        "top_p":1.0,
        "prompt_lookup_num_tokens":null,
        "guidance_scale":null,
        "bad_words_ids": null,
        "begin_suppress_tokens": null,
        "diversity_penalty": 0.0,
        "early_stopping": false,
        "encoder_no_repeat_ngram_size": 0,
        "encoder_repetition_penalty": 1.0,
        "epsilon_cutoff": 0.0,
        "eta_cutoff": 0.0,
        "exponential_decay_length_penalty": null,
        "forced_bos_token_id": null,
        "forced_decoder_ids": null,
        "forced_eos_token_id": null,
        "length_penalty": 1.0,
        "low_memory": null,
        "max_time": null,
        "no_repeat_ngram_size": 0,
        "num_assistant_tokens": 5,
        "num_assistant_tokens_schedule": "heuristic",
        "num_beam_groups": 1,
        "num_return_sequences": 1,
        "output_scores": false,
        "penalty_alpha": null,
        "remove_invalid_values": false,
        "renormalize_logits": false,
        "repetition_penalty": 1.0,
        "return_dict_in_generate": false,
        "sequence_bias": null,
        "suppress_tokens": null,
        "typical_p": 1.0
      },
      "text_encoder": null,
      "video_encoder": null
    }