{
"frames": 17,
"resolution": [480, 720],
"allow_internal_format":false,
"load_video_features": false,
"load_text_features": false,
"task": "t2v",
"enable_encoder_dp": false,
"predictor": {
"model_id": "satdit",
"from_pretrained": null,
"dtype": "bf16",
"num_layers": 42,
"pipeline_num_layers":null,
"num_heads": 48,
"head_dim": 64,
"in_channels": 16,
"out_channels": 16,
"dropout": 0.0,
"cross_attention_dim": null,
"attention_bias": true,
"input_size": [7, 60, 90],
"patch_size": [1, 2, 2],
"activation_fn": "gelu-approximate",
"num_embeds_ada_norm": 1000,
"norm_type": "qk_ln",
"norm_elementwise_affine": true,
"norm_eps": 1e-5,
"caption_channels": null,
"time_embed_dim": 512,
"text_length": 226,
"text_hidden_size": 4096,
"concat_text_embed": true,
"interpolation_scale": [1.0, 1.0, 1.0],
"use_rope": true
},
"diffusion": {
"model_id": "cogvideo_diffusion",
"sigma_sampler_config": {
"uniform_sampling": true,
"num_idx": 1000,
"discretization_config":{
"shift_scale": 1.0
}
},
"denoiser_config": {
"num_idx": 1000,
"quantize_c_noise": false,
"discretization_config":{
"shift_scale": 1.0
}
}
},
"text_encoder": {
"model_id": "T5",
"hub_backend": "hf",
"from_pretrained": "5b-cogvideo",
"dtype": "bf16",
"load_in_8bit": false,
"low_cpu_mem_usage": true,
"ucg_rate": 0.1,
"use_attention_mask": false
},
"ae": {
"model_id": "contextparallelcasualvae",
"from_pretrained": "3d-vae.pt",
"cp_size": 1,
"dtype": "bf16",
"z_channels": 16,
"conv_padding": 0,
"num_res_blocks": 3,
"hidden_size_mult": [1,2,2,4],
"use_tiling": false,
"encoder_attention": "",
"encoder_nonlinearity": "swish",
"encoder_conv_in": "ContextParallelCausalConv3d",
"encoder_conv_out": "ContextParallelCausalConv3d",
"encoder_mid_resnet": "ContextParallelResnetBlock3D",
"encoder_resnet_blocks": [
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D"
],
"encoder_spatial_downsample": [
"DownSample3D",
"DownSample3D",
"DownSample3D",
""
],
"encoder_temporal_downsample": [
"",
"",
"",
""
],
"decoder_attention": "",
"decoder_nonlinearity": "swish",
"decoder_conv_in": "ContextParallelCausalConv3d",
"decoder_conv_out": "ContextParallelCausalConv3d",
"decoder_mid_resnet": "ContextParallelResnetBlock3D",
"decoder_resnet_blocks": [
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D",
"ContextParallelResnetBlock3D"
],
"decoder_spatial_upsample": [
"",
"Upsample3D",
"Upsample3D",
"Upsample3D"
],
"decoder_temporal_upsample": [
"",
"",
"",
""
],
"encoder_gather_norm": true,
"decoder_gather_norm": true,
"use_quant_layer": false
}
}