{

    "load_video_features": true,

    "load_text_features": true,

    "enable_encoder_dp": false,

    "predictor": {

        "model_id": "stepvideodit",

        "from_pretrained": null,

        "dtype": "bf16",

        "num_layers" : 24,

        "num_heads": 48,

        "head_dim": 128,

        "channel_split": [64, 32, 32],

        "in_channels":64,

        "out_channels":64,

        "patch_size": 1,

        "patch_size_thw": [1, 1, 1],

        "norm_elementwise_affine": false,

        "norm_eps": 1e-6,

        "attention_norm_type": "rmsnorm",

        "attention_norm_elementwise_affine": true,

        "attention_norm_eps": 1e-6,

        "fa_layout": "bsnd",

        "use_additional_conditions": false,

        "caption_channels": [6144, 1024]

    },

    "diffusion": {

        "model_id": "flow_match_discrete_scheduler",

        "num_train_timesteps":1000,

        "shift": 13.0,

        "reverse": false,

        "solver": "euler"

    },

    "text_encoder": [

        {

        "model_id": "StepLLmModel",

        "hub_backend": "hf",

        "from_pretrained": "./weights/step_llm/",

        "dtype": "bf16"

        },

        {

        "model_id": "BertModel",

        "hub_backend": "hf",

        "from_pretrained": "./weights/hunyuan_clip/clip_text_encoder",

        "dtype": "float32"

        }

    ],

    "ae": {

        "model_id": "stepvideovae",

        "from_pretrained": "./weights/vae/vae_v2.safetensors",

        "dtype": "bf16",

        "z_channels": 64,

        "frame_len": 17,

        "version": 2

    }

}