{
"load_video_features": true,
"load_text_features": true,
"enable_encoder_dp": false,
"predictor": {
"model_id": "stepvideodit",
"from_pretrained": null,
"dtype": "bf16",
"num_layers" : 24,
"num_heads": 48,
"head_dim": 128,
"channel_split": [64, 32, 32],
"in_channels":64,
"out_channels":64,
"patch_size": 1,
"patch_size_thw": [1, 1, 1],
"norm_elementwise_affine": false,
"norm_eps": 1e-6,
"attention_norm_type": "rmsnorm",
"attention_norm_elementwise_affine": true,
"attention_norm_eps": 1e-6,
"fa_layout": "bsnd",
"use_additional_conditions": false,
"caption_channels": [6144, 1024]
},
"diffusion": {
"model_id": "flow_match_discrete_scheduler",
"num_train_timesteps":1000,
"shift": 13.0,
"reverse": false,
"solver": "euler"
},
"text_encoder": [
{
"model_id": "StepLLmModel",
"hub_backend": "hf",
"from_pretrained": "./weights/step_llm/",
"dtype": "bf16"
},
{
"model_id": "BertModel",
"hub_backend": "hf",
"from_pretrained": "./weights/hunyuan_clip/clip_text_encoder",
"dtype": "float32"
}
],
"ae": {
"model_id": "stepvideovae",
"from_pretrained": "./weights/vae/vae_v2.safetensors",
"dtype": "bf16",
"z_channels": 64,
"frame_len": 17,
"version": 2
}
}