{
"patch": {
"get_dist_model_load_from_pt": true
},
"load_video_features": false,
"load_text_features": false,
"task": "i2v",
"diffusion": {
"model_id": "wan_flow_match_scheduler",
"num_train_timesteps": 1000,
"shift": 5,
"sigma_min": 0.0,
"extra_one_step": true
},
"predictor": {
"model_id": "wandit",
"dtype": "bf16",
"model_type": "ti2v",
"patch_size": [1, 2, 2],
"text_len": 512,
"in_dim": 48,
"hidden_size": 3072,
"ffn_dim": 14336,
"freq_dim": 256,
"text_dim": 4096,
"img_dim": 1280,
"out_dim": 48,
"num_heads": 24,
"num_layers": 30,
"pipeline_num_layers": [30],
"qk_norm": true,
"qk_norm_type": "rmsnorm",
"cross_attn_norm": true,
"eps": 1e-6,
"max_seq_len": 1024,
"attention_async_offload": false,
"use_fused_rmsnorm":true,
"seperated_timestep": true
},
"text_encoder": {
"model_id": "UMT5",
"hub_backend": "hf",
"from_pretrained": "Wan2.2-TI2V-5B-Diffusers/text_encoder/",
"dtype": "bf16"
},
"ae": {
"model_id": "wan_video_vae",
"from_pretrained": "Wan2.2-TI2V-5B-Diffusers/vae",
"dtype": "bf16",
"enable_tiling": false,
"tiling_param": {
"tile_sample_min_height": 256,
"tile_sample_min_width": 256,
"tile_sample_stride_height": 192,
"tile_sample_stride_width": 192
},
"norm_latents": true,
"norm_mode": "channel_specified_shift_scale",
"do_sample": false,
"i2v_processor": {
"processor_id": "wan_ti2v_processor",
"i2v_vae_encode_tiling": false
}
}
}