{
    "dataset_param": {
        "dataset_type": "multimodal",
        "basic_parameters": {
            "data_path": "/path/json_path",
            "data_folder": "/path/root_path",
            "repeat_time": 1
        },
        "preprocess_parameters": {
            "image_reader_type": "torchvision",
            "train_pipeline": {
                "image":[
                    {"trans_type": "Pad2Square", "param": {"mean": [0.485, 0.456, 0.406]}},
                    {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC", "antialias": null}},
                    {"trans_type": "ToTensor"},
                    {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}}
		]
            }
        },
        "tokenizer_config": {
            "hub_backend": "hf",
            "autotokenizer_name": "AutoTokenizer",
            "from_pretrained": "OpenGVLab/InternVL2_5-4B",
            "add_eos_token": false,
            "use_fast": false
        },
        "use_text_processer": true,
        "template_name": "internvl2_5",
        "patch_size": 14,
        "image_size": 448,
        "down_sample_ratio": 0.5,
        "group_by_length": true,
        "dynamic_image_size": true,
        "use_thumbnail": true,
        "min_dynamic_patch": 1,
        "max_dynamic_patch": 6,
        "min_num_frame": 4,
        "max_num_frame": 12,
        "sampling_method": "rand"
    },
    "dataloader_param": {
        "dataloader_mode": "sampler",
        "shuffle": true,
        "drop_last": true,
        "pin_memory": true,
        "sampler_type": "BaseRandomBatchSampler",
        "collate_param": {
            "model_name": "internvl",
            "pad_id": 2
        }
    }
}