{
"dataset_param": {
"dataset_type": "dt2v",
"fps_max": 24,
"text_add_fps": true,
"basic_parameters": {
"data_path": "/your_data_folder/train_data.csv",
"data_folder": "/your_data_folder/",
"return_type": "dict"
},
"preprocess_parameters": {
"video_processor_type": "OpensoraVideoProcessor",
"video_reader_type": "AvVideo",
"image_reader_type": "torchvision",
"train_pipeline": {
"video": [
{"trans_type": "ToTensorVideo"},
{"trans_type": "ResizeCrop", "param": {"size": "auto"}},
{"trans_type": "norm_fun", "param": {"mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5], "inplace": true}}
],
"image": [
{"trans_type": "RandomHorizontalFlip"},
{"trans_type": "ToTensor"},
{"trans_type": "norm_fun", "param": {"mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5], "inplace": true}}
]
}
},
"use_text_processer": true,
"enable_text_preprocessing": false,
"tokenizer_config": [
{
"autotokenizer_name": "T5Tokenizer",
"hub_backend": "hf",
"from_pretrained": "Open-Sora-v2/google/t5-v1_1-xxl",
"model_max_length": 512
},
{
"autotokenizer_name": "CLIPTokenizer",
"hub_backend": "hf",
"from_pretrained": "Open-Sora-v2/openai/clip-vit-large-patch14",
"model_max_length": 77
}
],
"use_feature_data": false,
"vid_img_fusion_by_splicing": false,
"use_img_num": 4,
"use_img_from_vid": true,
"video_mask_ratios": {
"random": 0.05,
"intepolate": 0.005,
"quarter_random": 0.005,
"quarter_head": 0.005,
"quarter_tail": 0.005,
"quarter_head_tail": 0.005,
"image_random": 0.025,
"image_head": 0.05,
"image_tail": 0.025,
"image_head_tail": 0.025
}
},
"dataloader_param": {
"dataloader_mode": "variable",
"auto_gen_bucket": true,
"shuffle": true,
"drop_last": true,
"pin_memory": true,
"bucket_config": {
"256px": {"1": [1.0, 1], "33": [1.0, 1], "97": [1.0, 1], "129": [1.0, 1]}
}
}
}