TopLayer:
network_with_loss: module
VocabParallelEmbedding:
logical_or: __or__
reduce_from_mp_region.all_reduce: all_reduce
ParallelTransformerLayer:
attention: self_attention
ParallelAttention:
flash_attention_score: core_attention_flash.npu_fusion_attention
FusedRMSNorm:
RmsNorm: npu_rms_norm