{
"predictor": {
"model_id": "stepvideodit",
"from_pretrained": null,
"dtype": "bf16",
"num_layers" : 48,
"num_heads": 48,
"head_dim": 128,
"channel_split": [64, 32, 32],
"in_channels": 64,
"out_channels": 64,
"dropout": 0.0,
"patch_size": 1,
"patch_size_thw": [1, 1, 1],
"norm_type": "ada_norm_single",
"norm_elementwise_affine": false,
"norm_eps": 1e-6,
"use_additional_conditions": true,
"caption_channels": [6144, 1024],
"attention_norm_type": "rmsnorm",
"attention_norm_elementwise_affine": true,
"attention_norm_eps": 1e-6,
"fa_layout": "bsnd",
"masked_softmax_fusion": false
},
"ae": {
"model_id": "stepvideovae",
"from_pretrained": "./weights/vae/vae_v2.safetensors",
"dtype": "bf16",
"z_channels": 64,
"frame_len": 17,
"version": 2
},
"tokenizer":[
{
"autotokenizer_name": "stepchat",
"hub_backend": "hf",
"from_pretrained": "./weights/step_llm/step1_chat_tokenizer.model",
"model_max_length": 320
},
{
"autotokenizer_name": "BertTokenizer",
"hub_backend": "hf",
"from_pretrained": "./weights/hunyuan_clip/tokenizer",
"model_max_length": 77
}
],
"text_encoder": [
{
"model_id": "StepLLmModel",
"hub_backend": "hf",
"from_pretrained": "./weights/step_llm/",
"dtype": "bf16"
},
{
"model_id": "BertModel",
"hub_backend": "hf",
"from_pretrained": "./weights/hunyuan_clip/clip_text_encoder",
"dtype": "float32"
}
],
"diffusion": {
"model_id": "flow_match_discrete_scheduler",
"num_train_timesteps":1,
"num_inference_timesteps":50,
"shift": 13.0,
"reverse": false,
"solver": "euler"
},
"pipeline_config": {
"version": "stepvideo",
"use_attention_mask": true,
"input_size": [102, 544, 992],
"guidance_scale": 9.0,
"model_type": "i2v",
"motion_score": 5.0
},
"unload_text_encoder": true,
"frame_interval":1,
"save_path":"examples/stepvideo/i2v/i2v_result/",
"fps":25,
"prompt": "examples/stepvideo/i2v/samples_i2v_prompts.txt",
"image": "examples/stepvideo/i2v/samples_i2v_images.txt",
"num_inference_videos_per_sample": 1,
"use_prompt_preprocess": false,
"pipeline_class": "StepVideoPipeline",
"device":"npu",
"dtype": "bf16"
}