{
    "predictor": {
        "model_id": "stepvideodit",
        "from_pretrained": null,
        "dtype": "bf16",
        "num_layers" : 48,
        "num_heads": 48,
        "head_dim": 128,
        "channel_split": [64, 32, 32],
        "in_channels": 64,
        "out_channels": 64,
        "dropout": 0.0,
        "patch_size": 1,
        "patch_size_thw": [1, 1, 1],
        "norm_type": "ada_norm_single",
        "norm_elementwise_affine": false,
        "norm_eps": 1e-6,
        "use_additional_conditions": false,
        "caption_channels": [6144, 1024]
    },
    "ae": {
        "model_id": "stepvideovae",
        "from_pretrained": "./weights/vae/vae_v2.safetensors",
        "dtype": "bf16",
        "z_channels": 64,
        "frame_len": 17,
        "version": 2
    },
    "tokenizer":[
        {
            "autotokenizer_name": "stepchat",
            "hub_backend": "hf",
            "from_pretrained": "./weights/step_llm/step1_chat_tokenizer.model",
            "model_max_length": 320
        },
        {
            "autotokenizer_name": "BertTokenizer",
            "hub_backend": "hf",
            "from_pretrained": "./weights/hunyuan_clip/tokenizer",
            "model_max_length": 77
        }
    ],
    "text_encoder": [
        {
            "model_id": "StepLLmModel",
            "hub_backend": "hf",
            "from_pretrained": "./weights/step_llm/",
            "dtype": "bf16"
        },
        {
            "model_id": "BertModel",
            "hub_backend": "hf",
            "from_pretrained": "./weights/hunyuan_clip/clip_text_encoder",
            "dtype": "float32"
        }
    ],

    "diffusion": {
        "model_id": "flow_match_discrete_scheduler",
        "num_train_timesteps":1,
        "num_inference_timesteps":50,
        "shift": 13.0,
        "reverse": false,
        "solver": "euler"
    },
    "pipeline_config": {
        "version": "stepvideo",
        "use_attention_mask": true,
        "input_size": [204, 544, 992],
        "guidance_scale": 9.0,
        "model_type": "t2v"
    },
    "unload_text_encoder": true,
    "frame_interval":1,
    "save_path":"examples/stepvideo/t2v/t2v_result/",
    "fps":25,
    "prompt": "examples/stepvideo/t2v/samples_prompts.txt",
    "num_inference_videos_per_sample": 1,
    "use_prompt_preprocess": false,
    "pipeline_class": "StepVideoPipeline",
    "device":"npu",
    "dtype": "bf16"
}