{
    "ae": {
        "model_id": "casualvae",
        "from_pretrained": "./pretrain_models/opensoraplan_vae1_2/checkpoint.ckpt",
        "dtype": "bf16",
        "output_dtype": "bf16",
        "decoder_spatial_upsample": [
            "",
            "SpatialUpsample2x",
            "Spatial2xTime2x3DUpsample",
            "Spatial2xTime2x3DUpsample"
        ],
        "decoder_temporal_upsample": [
            "",
            "",
            "",
            ""
        ],
        "encoder_conv_in": "Conv2d",
        "encoder_resnet_blocks": [
            "ResnetBlock2D",
            "ResnetBlock2D",
            "ResnetBlock3D",
            "ResnetBlock3D"
        ],
        "encoder_spatial_downsample": [
            "Downsample",
            "Spatial2xTime2x3DDownsample",
            "Spatial2xTime2x3DDownsample",
            ""
        ],
        "encoder_temporal_downsample": [
            "",
            "",
            "",
            ""
        ],
        "use_tiling": true,
        "tile_overlap_factor": 0.125,
        "vae_scale_factor":[4,8,8],
        "tile_sample_min_size":512,
        "tile_latent_min_size":64,
        "tile_sample_min_size_t":29,
        "tile_latent_min_size_t":8
    },
    "text_encoder": {
       "hub_backend": "hf",
        "model_id": "T5",
        "from_pretrained": "./pretrain_models/text_encoder",
        "low_cpu_mem_usage": true,
        "dtype": "fp32"
    },
    "tokenizer":{
        "hub_backend": "hf",
        "autotokenizer_name": "AutoTokenizer",
        "from_pretrained": "./pretrain_models/tokenizer"
    },
    "predictor": {
        "dtype": "bf16",
        "model_id": "ptdit",
        "num_heads": 16,
        "head_dim": 72,
        "in_channels": 4,
        "out_channels": 8,
        "num_layers": 28,
        "dropout": 0.0,
        "norm_num_groups": 32,
        "cross_attention_dim": 1152,
        "attention_bias": true,
        "sample_size": 32,
        "frame": 1,
        "patch_size": 2,
        "activation_fn": "gelu-approximate",
        "num_embeds_ada_norm": 1000,
        "upcast_attention": false,
        "norm_type": "ada_norm_single",
        "norm_elementwise_affine": false,
        "norm_eps": 1e-06,
        "attention_type": "default",
        "caption_channels": 4096,
        "compress_ratios": [1, 8, 8],
        "proxy_compress_ratios": [1, 8, 8],
        "shift_window": true,
        "from_pretrained": "./pretrain_models/XXX.pt"
    },
    "diffusion": {
        "model_id": "rflow",
        "use_timestep_transform":true,
        "num_inference_steps":30,
        "cfg_scale":4.0,
        "sample_method": "logit-normal"
    },
    "pipeline_config": {
        "use_attention_mask": true,
        "use_y_embedder": false,
        "input_size": [1, 1024, 1024]
    },
    "frame_interval": 1,
    "model_max_length":200,
    "save_path":"./samples/test/image/",
    "fps":24,
    "prompt":"examples/qihoo_t2x/demo.txt",
    "pipeline_class": "QihooPipeline",
    "device":"npu",
    "dtype": "bf16"
}