{
"torch_npu":
{
"v2.1": [],
"v2.5": [],
"v2.6": ["npu_gelu_mul", "npu_clipped_swiglu", "npu_sim_exponential_"],
"v2.7": ["npu_gelu_mul", "npu_clipped_swiglu", "npu_sim_exponential_"],
"v2.8": ["npu_gelu_mul", "npu_clipped_swiglu", "npu_sim_exponential_"],
"v2.9": ["npu_gelu_mul", "npu_clipped_swiglu", "npu_sim_exponential_"],
"v2.10": ["npu_gelu_mul", "npu_clipped_swiglu", "npu_sim_exponential_"],
"all_version": [
"_npu_dropout",
"copy_memory_",
"empty_with_format",
"empty_with_swapped_memory",
"npu_alloc_float_status",
"npu_apply_adam",
"npu_advance_step_flashattn",
"npu_batch_gather_matmul",
"npu_batch_gather_matmul_",
"npu_bert_apply_adam",
"npu_clear_float_status",
"npu_cross_entropy_loss",
"npu_format_cast_",
"npu_fusion_attention",
"npu_get_float_status",
"npu_nms_rotated",
"npu_random_choice_with_mask",
"npu_rms_norm",
"npu_add_rms_norm_cast",
"npu_moe_compute_expert_tokens",
"npu_fused_infer_attention_score",
"npu_mla_prolog",
"npu_mla_prolog_v2",
"npu_mla_prolog_v3",
"npu_mla_prolog_v3_functional",
"npu_quant_lightning_indexer",
"npu_lightning_indexer",
"npu_sparse_flash_attention",
"npu_lightning_indexer_grad",
"npu_sparse_flash_attention_grad",
"npu_sparse_lightning_indexer_grad_kl_loss",
"npu_kv_quant_sparse_flash_attention",
"npu_convert_weight_to_int4pack",
"npu_ffn",
"npu_geglu",
"npu_grouped_matmul",
"npu_moe_finalize_routing",
"npu_quant_matmul",
"npu_quant_matmul_reduce_sum",
"npu_quant_scatter",
"npu_quantize",
"npu_dequant_bias",
"npu_group_quant",
"npu_dynamic_quant",
"npu_dynamic_quant_asymmetric",
"npu_scatter_nd_update_",
"npu_scatter_pa_kv_cache",
"npu_stride_copy",
"npu_gemma_rms_norm",
"npu_dequant_swiglu_quant",
"npu_swiglu",
"npu_gelu",
"npu_gelu_backward",
"npu_all_gather_base_mm",
"npu_mm_reduce_scatter_base",
"npu_prefetch",
"npu_quant_scatter_",
"npu_trans_quant_param",
"npu_top_k_top_p_sample",
"scatter_update",
"scatter_update_",
"npu_kronecker_quant",
"npu_group_norm_swish",
"npu_mrope",
"npu_grouped_matmul_finalize_routing",
"npu_grouped_matmul_swiglu_quant_v2",
"npu_recurrent_gated_delta_rule",
"npu_recurrent_gated_delta_rule_functional",
"npu_alltoallv_gmm",
"npu_gmm_alltoallv",
"npu_nsa_compress",
"npu_nsa_compress_infer",
"npu_nsa_compress_attention",
"npu_nsa_compress_attention_infer",
"npu_nsa_select_attention",
"npu_nsa_select_attention_infer",
"npu_transpose_batchmatmul",
"npu_gather_sparse_index",
"npu_moe_distribute_combine_add_rms_norm",
"npu_moe_update_expert",
"npu_dynamic_block_quant",
"attention_worker_scheduler_",
"attention_worker_scheduler",
"ffn_worker_scheduler_",
"ffn_worker_scheduler"
]
}
}