sharding_size: 8
sub_modules_to_wrap:
  - mindspeed_mm.models.predictor.dits.wan_dit.WanDiTBlock
reshard_after_forward: True
param_dtype: "bf16"
reduce_dtype: "fp32"
cast_forward_inputs: True