* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* 我们正常的版权申明,下面是我们的备注
*
* NOTE: Portions of this code were AI-generated and have been
* technically reviewed for functional accuracy and security
*/
* \file reduce_nansum_arch32.cpp
* \brief ReduceNansum Kernel 入口(arch32 架构)
*
* 模板参数说明(与 reduce_nansum_tiling_key.h 中 ASCENDC_TPL_ARGS_DECL 定义对应):
* - D_T_X: 数据类型,由 ASCENDC_TPL_DATATYPE_DECL 定义
* - SCH_MODE: 调度模式(0=AR全载, 1=AR-ColSplit, 2=ARA全载, 3=ARA-RowSplit)
*/
#include "common/reduce_nansum.h"
template <typename D_T_X, int SCH_MODE>
__global__ __aicore__ void reduce_nansum(GM_ADDR x, GM_ADDR axes, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling)
{
REGISTER_TILING_DEFAULT(ReduceNansumTilingData);
GET_TILING_DATA_WITH_STRUCT(ReduceNansumTilingData, tilingData, tiling);
if constexpr (SCH_MODE == 0) {
NsReduceNansum::ReduceNansumArFullload<D_T_X> op;
op.Init(x, y, &tilingData);
op.Process();
} else if constexpr (SCH_MODE == 1) {
NsReduceNansum::ReduceNansumArColsplit<D_T_X> op;
op.Init(x, y, &tilingData);
op.Process();
} else if constexpr (SCH_MODE == 2) {
NsReduceNansum::ReduceNansumAraFullload<D_T_X> op;
op.Init(x, y, &tilingData);
op.Process();
} else if constexpr (SCH_MODE == 3) {
NsReduceNansum::ReduceNansumAraRowsplit<D_T_X> op;
op.Init(x, y, &tilingData);
op.Process();
}
}