{
    "frames": 37,
    "resolution": [480, 720],
    "allow_internal_format":false,
    "load_video_features": false,
    "load_text_features": false,
    "task": "t2v",
    "enable_encoder_dp": true,
    "predictor": {
        "model_id": "satdit",
        "from_pretrained": null,
        "dtype": "bf16",
        "num_layers": 42,
        "pipeline_num_layers":null,
        "num_heads": 48,
        "head_dim": 64,
        "in_channels": 16,
        "out_channels": 16,
        "dropout": 0.0,
        "cross_attention_dim": null,
        "attention_bias": true,
        "input_size": [6, 96, 170],
        "patch_type": "3D",
        "patch_size": [2, 2, 2],
        "activation_fn": "gelu-approximate",
        "num_embeds_ada_norm": 1000,
        "norm_type": "qk_ln",
        "norm_elementwise_affine": true,
        "norm_eps": 1e-5,
        "caption_channels": null,
        "time_embed_dim": 512,
        "text_length": 226,
        "text_hidden_size": 4096,
        "concat_text_embed": true,
        "interpolation_scale": [1.0, 1.0, 1.0],
        "use_rope": true
    },
    "diffusion": {
        "model_id": "cogvideo_diffusion",
        "sigma_sampler_config": {
            "uniform_sampling": true,
            "num_idx": 1000,
            "discretization_config":{
                "shift_scale": 1.0
            }
        },
        "denoiser_config": {
            "num_idx": 1000,
            "quantize_c_noise": false,
            "discretization_config":{
                "shift_scale": 1.0
            }
        }
    },
    "text_encoder": {
        "model_id": "T5",
        "hub_backend": "hf",
        "from_pretrained": "5b-cogvideo",
        "dtype": "bf16",
        "load_in_8bit": false,
        "low_cpu_mem_usage": true,
        "ucg_rate": 0.1,
        "use_attention_mask": false
    },
   "ae": {
        "model_id": "contextparallelcasualvae",
        "from_pretrained": "3d-vae.pt",
        "cp_size": 1,
        "dtype": "bf16",
        "z_channels": 16,
        "conv_padding": 0,
        "num_res_blocks": 3,
        "hidden_size_mult": [1,2,2,4],
        "use_tiling": false,
        "encoder_attention": "",
        "encoder_nonlinearity": "swish",
        "encoder_conv_in": "ContextParallelCausalConv3d",
        "encoder_conv_out": "ContextParallelCausalConv3d",
        "encoder_mid_resnet": "ContextParallelResnetBlock3D",
        "encoder_resnet_blocks": [
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D"
        ],
        "encoder_spatial_downsample": [
            "DownSample3D",
            "DownSample3D",
            "DownSample3D",
            ""
        ],
        "encoder_temporal_downsample": [
            "",
            "",
            "",
            ""
        ],
        "decoder_attention": "",
        "decoder_nonlinearity": "swish",
        "decoder_conv_in": "ContextParallelCausalConv3d",
        "decoder_conv_out": "ContextParallelCausalConv3d",
        "decoder_mid_resnet": "ContextParallelResnetBlock3D",
        "decoder_resnet_blocks": [
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D",
            "ContextParallelResnetBlock3D"
        ],
        "decoder_spatial_upsample": [
            "",
            "Upsample3D",
            "Upsample3D",
            "Upsample3D"
        ],
        "decoder_temporal_upsample": [
            "",
            "",
            "",
            ""
        ],
        "encoder_gather_norm": true,
        "decoder_gather_norm": true,
        "use_quant_layer": false
    }
}