{
"dataset_param": {
"dataset_type": "multimodal",
"basic_parameters": {
"data_path": "/path/json_path",
"data_folder": "/path/root_path",
"repeat_time": 1
},
"preprocess_parameters": {
"image_reader_type": "torchvision",
"train_pipeline": {
"image":[
{"trans_type": "Pad2Square", "param": {"mean": [0.485, 0.456, 0.406]}},
{"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC", "antialias": null}},
{"trans_type": "ToTensor"},
{"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}}
]
}
},
"tokenizer_config": {
"hub_backend": "hf",
"autotokenizer_name": "AutoTokenizer",
"from_pretrained": "OpenGVLab/InternVL2_5-4B",
"add_eos_token": false,
"use_fast": false
},
"use_text_processer": true,
"template_name": "internvl2_5",
"patch_size": 14,
"image_size": 448,
"down_sample_ratio": 0.5,
"group_by_length": true,
"dynamic_image_size": true,
"use_thumbnail": true,
"min_dynamic_patch": 1,
"max_dynamic_patch": 6,
"min_num_frame": 4,
"max_num_frame": 12,
"sampling_method": "rand"
},
"dataloader_param": {
"dataloader_mode": "sampler",
"shuffle": true,
"drop_last": true,
"pin_memory": true,
"sampler_type": "BaseRandomBatchSampler",
"collate_param": {
"model_name": "internvl",
"pad_id": 2
}
}
}