{
"dataset_param":{
"dataset_type": "bagel",
"preprocess_parameters": {
},
"basic_parameters": {
"available_data": ["vlm_sft","t2i_pretrain"],
"t2i_pretrain":{
"dataset_names": ["t2i"],
"t2i": {
"data_dir": "./t2i",
"num_files": 8,
"num_total_samples": 833
},
"image_transform_args": {
"image_stride": 16,
"max_image_size": 1024,
"min_image_size": 512
},
"is_mandatory": true,
"num_used_data": [8],
"weight": 1
},
"vlm_sft":{
"dataset_names": ["llava_ov"],
"llava_ov":{
"data_dir": "./train2017",
"jsonl_path": "./coco.jsonl",
"num_total_samples": 2000
},
"image_transform_args": {
"image_stride": 14,
"max_image_size": 980,
"min_image_size": 378,
"max_pixels": 2007040
},
"is_mandatory": true,
"shuffle_lines": false,
"shuffle_seed": 0,
"num_used_data": [2000],
"weight": 1
},
"model_path": "./weight/BAGEL-7B-MoT",
"text_cond_dropout_prob":0.0,
"vae_cond_dropout_prob":0.0,
"vit_cond_dropout_prob":0.0,
"vit_patch_size": 14,
"max_num_patch_per_side": 70,
"max_latent_size": 64,
"vae_image_downsample": 16,
"data_seed":42,
"num_workers": 1,
"interpolate_pos": false,
"use_flex": false,
"data_status": null
},
"packed_parameters":{
"expected_num_tokens": 10240,
"max_num_tokens_per_sample": 10240,
"max_num_tokens": 11520,
"prefer_buffer_before": 16384,
"max_buffer_size": 50
}
},
"dataloader_param": {
"dataloader_mode": "base",
"drop_last": true,
"seed": 42,
"batch_size": 1,
"pin_memory": true,
"collate_param": {
"model_name": "bagel"
},
"shuffle": null,
"prefetch_factor": 2
}
}