* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file sort_apt.cpp
* \brief
*/
#include "kernel_tiling/kernel_tiling.h"
#include "basic_api/kernel_vec_intf.h"
#include "arch35/sort_tiling_key.h"
#include "arch35/sort_tiling_data.h"
#include "arch35/sort_radix_sort_more_core.h"
#include "arch35/sort_radix_sort_one_core.h"
#include "arch35/sort_merge_sort.h"
#include "arch35/merge_sort_big_size.h"
#include "arch35/sort_merge_intra_core.h"
#include "arch35/sort_small_axis_insertion.h"
#include "arch35/sort_small_axis_two_stage.h"
#include "arch35/sort_axis_one_copy.h"
using namespace AscendC;
using namespace Sort;
template <uint64_t schId, uint64_t isInt32, uint64_t isDescend>
__global__ __aicore__ void sort(
GM_ADDR x, GM_ADDR y1, GM_ADDR y2, GM_ADDR workspace, GM_ADDR tiling)
{
REGISTER_TILING_DEFAULT(SortRegBaseTilingData);
GET_TILING_DATA_WITH_STRUCT(SortRegBaseTilingData, tilingData, tiling);
GM_ADDR usrWorkspace = AscendC::GetUserWorkspace(workspace);
KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIV_1_0);
TPipe pipe;
constexpr bool isDescending = (isDescend != 0);
if constexpr (schId == 7) {
Sort::SortAxisOneCopy<DTYPE_X, DTYPE_Y2> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
return;
}
if constexpr (schId == 2) {
if constexpr(sizeof(DTYPE_X) == 1) {
if constexpr(isInt32 == 1) {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint8_t, uint32_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint8_t, int64_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
}
if constexpr(sizeof(DTYPE_X) == 2) {
if constexpr(isInt32 == 1) {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint16_t,uint32_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint16_t,int64_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
}
if constexpr(sizeof(DTYPE_X) == 4) {
if constexpr(isInt32 == 1) {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint32_t,uint32_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint32_t,int64_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
}
if constexpr(sizeof(DTYPE_X) == 8) {
if constexpr(isInt32 == 1) {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint64_t, uint32_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else {
SortRadixMoreCore<DTYPE_X, DTYPE_Y2, uint64_t, int64_t,isDescend> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
}
return;
}
if constexpr (schId == 1) {
SortRadixOneCore<DTYPE_X, DTYPE_Y2, isDescending> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
return;
}
if constexpr (schId == 0 || schId == 8) {
constexpr uint64_t isSort32SmallAxis = (schId == 8);
if constexpr (IsSameType<bfloat16_t, DTYPE_X>::value) {
MergeSort<DTYPE_X, DTYPE_Y2, float, isDescend, isSort32SmallAxis> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else if constexpr (IsSameType<float, DTYPE_X>::value || IsSameType<half, DTYPE_X>::value) {
MergeSort<DTYPE_X, DTYPE_Y2, DTYPE_X, isDescend, isSort32SmallAxis> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
return;
}
if constexpr (schId == 3) {
if constexpr (IsSameType<DTYPE_X, bfloat16_t>::value) {
MergeSortBigSize<DTYPE_X, float, isDescending, DTYPE_Y2> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
if constexpr (IsSameType<float, DTYPE_X>::value || IsSameType<half, DTYPE_X>::value) {
MergeSortBigSize<DTYPE_X, DTYPE_X, isDescending, DTYPE_Y2> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
return;
}
if constexpr (schId == 4) {
if constexpr (IsSameType<float, DTYPE_X>::value) {
Sort::SortMergeIntraCore<DTYPE_X, DTYPE_Y2, isDescending> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
return;
}
if constexpr (schId == 5) {
if constexpr (IsSameType<bfloat16_t, DTYPE_X>::value) {
Sort::SortSmallAxisInsertion<DTYPE_X, float, DTYPE_Y2, isDescending> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
} else {
Sort::SortSmallAxisInsertion<DTYPE_X, DTYPE_X, DTYPE_Y2, isDescending> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
}
return;
}
if constexpr (schId == 6) {
Sort::SortSmallAxisTwoStage<DTYPE_X, DTYPE_Y2, isDescending> op;
op.Init(x, y1, y2, usrWorkspace, &tilingData, &pipe);
op.Process();
return;
}
}