{
"device": "npu",
"use_ema": false,
"dtype": "bf16",
"text_input_ids_pad": true,
"load_video_features": false,
"load_text_features": false,
"allow_internal_format": false,
"ae": {
"model_id": "autoencoder_kl_hunyuanvideo",
"from_pretrained": "Open-Sora-v2/hunyuan_vae.safetensors",
"dtype": "bf16",
"latent_channels": 16,
"block_out_channels": [128, 256, 512, 512],
"layers_per_block": 2,
"in_channels": 3,
"norm_num_groups": 32,
"out_channels": 3,
"sample_size": 256,
"sample_tsize": 64,
"down_block_types": [
"DownEncoderBlockCausal3D",
"DownEncoderBlockCausal3D",
"DownEncoderBlockCausal3D",
"DownEncoderBlockCausal3D"
],
"up_block_types": [
"UpDecoderBlockCausal3D",
"UpDecoderBlockCausal3D",
"UpDecoderBlockCausal3D",
"UpDecoderBlockCausal3D"
],
"scaling_factor": 0.476986,
"time_compression_ratio": 4,
"mid_block_add_attention": true,
"act_fn": "silu",
"use_spatial_tiling": true,
"use_temporal_tiling": false
},
"text_encoder": [
{
"model_id": "T5",
"hub_backend": "hf",
"from_pretrained": "Open-Sora-v2/google/t5-v1_1-xxl",
"dtype": "bf16",
"use_attention_mask": false,
"low_cpu_mem_usage": true,
"ucg_rate": 0.1
},
{
"model_id": "CLIP",
"hub_backend": "hf",
"from_pretrained": "Open-Sora-v2/openai/clip-vit-large-patch14",
"dtype": "bf16",
"use_attention_mask": false,
"output_key": "pooler_output",
"low_cpu_mem_usage": true,
"ucg_rate": 0.1
}
],
"predictor": {
"dtype": "bf16",
"model_id": "mmdit",
"from_pretrained": null,
"patch_size": 2,
"in_channels": 64,
"hidden_size": 3072,
"num_heads": 24,
"mm_double_blocks_depth": 19,
"mm_single_blocks_depth": 38,
"mlp_ratio": 4.0,
"vec_in_dim": 768,
"context_in_dim": 4096,
"axes_dim": [16, 56, 56],
"attention_q_bias":true,
"attention_k_bias":true,
"attention_v_bias":true,
"fused_qkv": false,
"guidance_embed": false,
"double_stream_full_recompute_layers": 19,
"single_stream_full_recompute_layers": 38,
"use_liger_rope": true
},
"diffusion": {
"model_id": "opensora2_flow_match_scheduler",
"use_timestep_transform": true
}
}