自定义API参考
-
-
- torch_npu接口列表
- (beta)torch_npu._npu_dropout
- (beta)torch_npu.copy_memory_
- (beta)torch_npu.empty_with_format
- (beta)torch_npu.fast_gelu
- (beta)torch_npu.npu_alloc_float_status
- (beta)torch_npu.npu_anchor_response_flags
- (beta)torch_npu.npu_apply_adam
- (beta)torch_npu.npu_batch_nms
- (beta)torch_npu.npu_bert_apply_adam
- (beta)torch_npu.npu_bmmV2
- (beta)torch_npu.npu_bounding_box_decode
- (beta)torch_npu.npu_bounding_box_encode
- (beta)torch_npu.npu_broadcast
- (beta)torch_npu.npu_ciou
- (beta)torch_npu.npu_clear_float_status
- (beta)torch_npu.npu_confusion_transpose
- (beta)torch_npu.npu_conv_transpose2d
- (beta)torch_npu.npu_conv2d
- (beta)torch_npu.npu_conv3d
- (beta)torch_npu.npu_convolution
- (beta)torch_npu.npu_convolution_transpose
- (beta)torch_npu.npu_deformable_conv2d
- (beta)torch_npu.npu_diou
- (beta)torch_npu.npu_dtype_cast
- (beta)torch_npu.npu_format_cast
- (beta)torch_npu.npu_format_cast_
- (beta)torch_npu.npu_get_float_status
- (beta)torch_npu.npu_giou
- (beta)torch_npu.npu_grid_assign_positive
- (beta)torch_npu.npu_gru
- (beta)torch_npu.npu_indexing
- (beta)torch_npu.npu_iou
- (beta)torch_npu.npu_layer_norm_eval
- (beta)torch_npu.npu_linear
- (beta)torch_npu.npu_lstm
- (beta)torch_npu.npu_max
- (beta)torch_npu.npu_min
- (beta)torch_npu.npu_mish
- (beta)torch_npu.npu_nms_rotated
- (beta)torch_npu.npu_nms_v4
- (beta)torch_npu.npu_nms_with_mask
- (beta)torch_npu.npu_one_hot
- (beta)torch_npu.npu_pad
- (beta)torch_npu.npu_ps_roi_pooling
- (beta)torch_npu.npu_ptiou
- (beta)torch_npu.npu_random_choice_with_mask
- (beta)torch_npu.npu_reshape
- (beta)torch_npu.npu_roi_align
- (beta)torch_npu.npu_rotated_iou
- (beta)torch_npu.npu_rotated_overlaps
- (beta)torch_npu.npu_sign_bits_pack
- (beta)torch_npu.npu_sign_bits_unpack
- (beta)torch_npu.npu_silu
- (beta)torch_npu.npu_slice
- (beta)torch_npu.npu_softmax_cross_entropy_with_logits
- (beta)torch_npu.npu_sort_v2
- (beta)torch_npu.npu_transpose
- (beta)torch_npu.npu_yolo_boxes_encode
- (beta)torch_npu.npu_fused_attention_score
- (beta)torch_npu.npu_multi_head_attention
- (beta)torch_npu.npu_rms_norm
- (beta)torch_npu.npu_dropout_with_add_softmax
- torch_npu.npu_rotary_mul
- torch_npu.npu_scaled_masked_softmax
- (beta)torch_npu.npu_swiglu
- (beta)torch_npu.one_
- torch_npu.npu_group_norm_swish
- torch_npu.npu_cross_entropy_loss
- torch_npu.npu_advance_step_flashattn
- torch_npu.npu_all_gather_base_mm
- torch_npu.npu_anti_quant
- torch_npu.npu_convert_weight_to_int4pack
- torch_npu.npu_dynamic_quant
- torch_npu.npu_dynamic_quant_asymmetric
- torch_npu.npu_fast_gelu
- torch_npu.npu_ffn
- torch_npu.npu_fused_infer_attention_score
- torch_npu.npu_fused_infer_attention_score_v2
- torch_npu.npu_fusion_attention
- torch_npu.npu_gelu
- torch_npu.npu_gelu_mul
- torch_npu.npu_group_norm_silu
- torch_npu.npu_group_quant
- torch_npu.npu_grouped_matmul
- torch_npu.npu_grouped_matmul_finalize_routing
- torch_npu.npu_grouped_matmul_swiglu_quant_v2
- torch_npu.npu_incre_flash_attention
- torch_npu.npu_kv_quant_sparse_flash_attention
- torch_npu.npu_lightning_indexer
- torch_npu.npu_mla_prolog
- torch_npu.npu_mla_prolog_v2
- (beta)torch_npu.npu_mla_prolog_v3
- torch_npu.npu_mm_all_reduce_base
- torch_npu.npu_mm_reduce_scatter_base
- torch_npu.npu_moe_compute_expert_tokens
- torch_npu.npu_moe_finalize_routing
- torch_npu.npu_moe_gating_top_k_softmax
- torch_npu.npu_moe_init_routing
- torch_npu.npu_prefetch
- torch_npu.npu_prompt_flash_attention
- torch_npu.npu_quant_lightning_indexer
- torch_npu.npu_quant_matmul
- torch_npu.npu_quant_matmul_reduce_sum
- torch_npu.npu_quant_scatter
- torch_npu.npu_quant_scatter_
- torch_npu.npu_quantize
- torch_npu.npu_recurrent_gated_delta_rule
- torch_npu.npu_scatter_nd_update
- torch_npu.npu_scatter_nd_update_
- torch_npu.npu_sparse_flash_attention
- torch_npu.npu_sparse_lightning_indexer_grad_kl_loss
- torch_npu.npu_top_k_top_p
- torch_npu.npu_top_k_top_p_sample
- torch_npu.npu_scatter_pa_kv_cache
- torch_npu.npu_trans_quant_param
- torch_npu.npu_weight_quant_batchmatmul
- torch_npu.scatter_update
- torch_npu.scatter_update_
- torch_npu.empty_with_swapped_memory
- torch_npu.erase_stream
- torch_npu.npu_gather_sparse_index
- torch_npu.npu_moe_distribute_combine
- torch_npu.npu_moe_distribute_dispatch
- torch_npu.npu_moe_distribute_combine_v2
- torch_npu.npu_moe_distribute_dispatch_v2
- torch_npu.npu_moe_gating_top_k
- torch_npu.npu_moe_init_routing_v2
- torch_npu.npu_swiglu_quant
- torch_npu.npu_dequant_swiglu_quant
- torch_npu.npu_kv_rmsnorm_rope_cache
- torch_npu.npu_interleave_rope
- torch_npu.npu_moe_re_routing
- torch_npu.matmul_checksum
- torch_npu.npu_alltoallv_gmm
- torch_npu.npu_gmm_alltoallv
- torch_npu.npu_moe_distribute_combine_add_rms_norm
- torch_npu.npu_transpose_batchmatmul
- torch_npu.npu_moe_update_expert
- torch_npu.npu_dynamic_block_quant
- torch_npu.set_device_limit
- torch_npu.get_device_limit
- torch_npu.set_stream_limit
- torch_npu.reset_stream_limit
- torch_npu.get_stream_limit
- torch_npu.npu_sim_exponential_
-
- torch_npu.contrib接口列表
- (beta)torch_npu.contrib.npu_fused_attention_with_layernorm
- (beta)torch_npu.contrib.npu_fused_attention
- (beta)torch_npu.contrib.Prefetcher
- (beta)torch_npu.contrib.DCNv2
- (beta)torch_npu.contrib.BiLSTM
- (beta)torch_npu.contrib.Swish
- (beta)torch_npu.contrib.NpuFairseqDropout
- (beta)torch_npu.contrib.npu_giou
- (beta)torch_npu.contrib.npu_ptiou
- (beta)torch_npu.contrib.npu_iou
- (beta)torch_npu.contrib.function.fuse_add_softmax_dropout
- (beta)torch_npu.contrib.function.npu_diou
- (beta)torch_npu.contrib.function.npu_ciou
- (beta)torch_npu.contrib.module.NpuCachedDropout
- (beta)torch_npu.contrib.module.MultiheadAttention
- (beta)torch_npu.contrib.function.npu_single_level_responsible_flags
- (beta)torch_npu.contrib.function.npu_bbox_coder_encode_xyxy2xywh
- (beta)torch_npu.contrib.function.npu_fast_condition_index_put
- (beta)torch_npu.contrib.function.matmul_transpose
- (beta)torch_npu.contrib.function.npu_multiclass_nms
- (beta)torch_npu.contrib.function.npu_batched_multiclass_nms
- (beta)torch_npu.contrib.function.roll
- (beta)torch_npu.contrib.module.Mish
- (beta)torch_npu.contrib.module.SiLU
- (beta)torch_npu.contrib.module.ChannelShuffle
- (beta)torch_npu.contrib.module.LabelSmoothingCrossEntropy
- (beta)torch_npu.contrib.module.ModulatedDeformConv
- (beta)torch_npu.contrib.module.NpuDropPath
- (beta)torch_npu.contrib.module.Focus
- (beta)torch_npu.contrib.module.PSROIPool
- (beta)torch_npu.contrib.module.ROIAlign
- (beta)torch_npu.contrib.module.FusedColorJitter
- (beta)torch_npu.contrib.function.npu_bbox_coder_decode_xywh2xyxy
- (beta)torch_npu.contrib.function.npu_bbox_coder_encode_yolo
- (beta)torch_npu.contrib.module.npu_modules.DropoutWithByteMask
- (beta)torch_npu.contrib.function.dropout_with_byte_mask
- torch_npu.contrib.module.LinearA8W8Quant
- torch_npu.contrib.module.LinearQuant
- torch_npu.contrib.module.LinearWeightQuant
- torch_npu.contrib.module.QuantConv2d
-
-
- (beta)torch_npu.npu.get_npu_overflow_flag
- (beta)torch_npu.npu.clear_npu_overflow_flag
- torch_npu.npu.enable_deterministic_with_backward
- torch_npu.npu.disable_deterministic_with_backward
- torch_npu.npu.matmul.allow_hf32
- torch_npu.npu.conv.allow_hf32
- (beta)torch_npu.npu.set_option
- (beta)torch_npu.npu.config.allow_internal_format
- (beta)torch_npu.npu.stress_detect
- (beta)torch_npu.npu.stop_device
- (beta)torch_npu.npu.restart_device
- (beta)torch_npu.npu.check_uce_in_memory
- torch_npu.npu.SyncLaunchStream
- (beta)torch_npu.npu.utils.is_support_inf_nan
- (beta)torch_npu.npu.utils.npu_check_overflow
- (beta)torch_npu.npu.Event().recorded_time()
- (beta)torch_npu.npu.set_dump
- (beta)torch_npu.npu.init_dump
- (beta)torch_npu.npu.finalize_dump
- (beta)torch_npu.npu.set_compile_mode
- (beta)torch_npu.npu.is_jit_compile_false
- (beta)torch_npu.npu.set_mm_bmm_format_nd
- (beta)torch_npu.npu.get_mm_bmm_format_nd
- torch_npu.npu.ExternalEvent
- torch_npu.npu.ExternalEvent().record()
- torch_npu.npu.ExternalEvent().reset()
- torch_npu.npu.ExternalEvent().wait()
- torch_npu.npu.graph_task_group_begin
- torch_npu.npu.graph_task_group_end
- torch_npu.npu.graph_task_update_begin
- torch_npu.npu.graph_task_update_end
- (beta)torch_npu.npu.obfuscation_initialize
- (beta)torch_npu.npu.obfuscation_finalize
- (beta)torch_npu.npu.obfuscation_calculate
- (beta)torch_npu.npu.aclnn.version
- torch_npu.npu.aclnn.allow_hf32
- torch_npu.npu.set_deterministic_level
- torch_npu.npu.use_compatible_impl
- torch_npu.npu.are_compatible_impl_enabled
-
- torch_npu.optim接口列表
- torch_npu.optim.NpuFusedOptimizerBase
- torch_npu.optim.NpuFusedSGD
- torch_npu.optim.NpuFusedAdadelta
- torch_npu.optim.NpuFusedLamb
- torch_npu.optim.NpuFusedAdam
- torch_npu.optim.NpuFusedAdamW
- torch_npu.optim.NpuFusedAdamP
- torch_npu.optim.NpuFusedBertAdam
- torch_npu.optim.NpuFusedRMSprop
- torch_npu.optim.NpuFusedRMSpropTF
-
- torch_npu.profiler接口列表
- torch_npu.profiler.profile
- torch_npu.profiler._KinetoProfile
- torch_npu.profiler.ProfilerActivity
- torch_npu.profiler.tensorboard_trace_handler
- torch_npu.profiler.schedule
- torch_npu.profiler.ProfilerAction
- torch_npu.profiler._ExperimentalConfig
- torch_npu.profiler.ExportType
- torch_npu.profiler.ProfilerLevel
- torch_npu.profiler.AiCMetrics
- torch_npu.profiler.supported_activities
- torch_npu.profiler.supported_profiler_level
- torch_npu.profiler.supported_ai_core_metrics
- torch_npu.profiler.supported_export_type
- torch_npu.profiler.dynamic_profile.init
- torch_npu.profiler.dynamic_profile.step
- torch_npu.profiler.dynamic_profile.start
- torch_npu.profiler.profiler.analyse
- torch_npu.profiler.profile.enable_profiler_in_child_thread
- torch_npu.profiler.profile.disable_profiler_in_child_thread
-
- torch_npu.utils接口列表
- (beta)torch_npu.utils.save_async
- (beta)torch_npu.utils.npu_combine_tensors
- (beta)torch_npu.utils.get_part_combined_tensor
- (beta)torch_npu.utils.is_combined_tensor_valid
- (beta)torch_npu.utils.FlopsCounter
- torch_npu.utils.set_thread_affinity
- torch_npu.utils.reset_thread_affinity
- torch_npu.utils.get_cann_version
-
- Distributed接口列表
- (beta)torch.distributed.is_hccl_available
- torch.distributed.distributed_c10d._world.default_pg._get_backend(torch.device("npu")).get_hccl_comm_name
- (beta)torch.distributed.ProcessGroupHCCL
- (beta)torch_npu.distributed.reinit_process_group
- (beta)torch_npu.distributed.reduce_scatter_tensor_uneven
- (beta)torch_npu.distributed.all_gather_into_tensor_uneven
-
-
- C++接口列表
- (beta)torch_npu::init_npu
- (beta)torch_npu::finalize_npu
- (beta)torch::npu::synchronize
- (beta)c10::npu::current_device
- (beta)at::Device
- (beta)struct c10_npu::NPUEvent
- (beta)class at_npu::NPUGeneratorImpl
- (beta)at_npu::detail::getDefaultNPUGenerator
- (beta)at_npu::detail::createNPUGenerator
- (beta)class c10_npu::NPUStream
- (beta)c10_npu::getNPUStreamFromPool
- (beta)c10_npu::getDefaultNPUStream
- (beta)c10_npu::getCurrentNPUStream
- (beta)c10_npu::setCurrentNPUStream
- (beta)class at_npu::native::OpCommand
- (beta)struct c10_npu::NPUHooksInterface
- (beta)struct c10_npu::NPUHooksArgs
- (beta)c10_npu::device_count
- (beta)c10_npu::GetDevice
- (beta)c10_npu::SetDevice
- (beta)c10_npu::current_device
- (beta)c10_npu::set_device
- (beta)c10_npu::warning_state
- (beta)c10_npu::warn_or_error_on_sync
- (beta)at_npu::native::get_npu_format
- (beta)at_npu::native::get_npu_storage_sizes
- (beta)at_npu::native::npu_format_cast
- (beta)at_npu::native::empty_with_format
- (beta)c10_npu::c10_npu_get_error_message
- (beta)at_npu::native::npu_dropout_gen_mask
- at_npu.native-empty_with_swapped_memory
- c10_npu::NPUStreamGuard
- c10_npu::NPUStreamGuard::current_device
- c10_npu::NPUStreamGuard::current_stream
- c10_npu::NPUStreamGuard::NPUStreamGuard
- c10_npu::NPUStreamGuard::original_device
- c10_npu::NPUStreamGuard::original_stream
- c10_npu::NPUStreamGuard::reset_stream
- c10_npu::stream_synchronize
- c10d_npu::ProcessGroupHCCL
- c10d_npu::ProcessGroupHCCL::batch_isend_irecv