* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* NOTE: Portions of this code were AI-generated and have been
* technically reviewed for functional accuracy and security
*/
* @file aclnn_population_count.cpp
* @brief ACLNN L2 API implementation for PopulationCount
*/
#include "aclnn_population_count.h"
#include "population_count.h"
#include "aclnn_kernels/contiguous.h"
#include "aclnn_kernels/common/op_error_check.h"
#include "opdev/op_log.h"
#include "opdev/op_dfx.h"
#include "opdev/common_types.h"
#include "opdev/data_type_utils.h"
#include "opdev/make_op_executor.h"
using namespace op;
#define ACLNN_MAX_SHAPE_RANK 8
static const std::initializer_list<op::DataType> X_DTYPE_SUPPORT_LIST = {
DataType::DT_INT16, DataType::DT_UINT16
};
static bool IsXDtypeSupported(DataType dtype)
{
return CheckType(dtype, X_DTYPE_SUPPORT_LIST);
}
static bool CheckNotNull(const aclTensor* x, const aclTensor* y)
{
OP_CHECK_NULL(x, return false);
OP_CHECK_NULL(y, return false);
return true;
}
static bool CheckDtypeValid(const aclTensor* x, const aclTensor* y)
{
OP_CHECK(IsXDtypeSupported(x->GetDataType()),
OP_LOGE(ACLNN_ERR_PARAM_INVALID,
"PopulationCount: x dtype not supported: %d (must be INT16 or UINT16).",
static_cast<int>(x->GetDataType())),
return false);
OP_CHECK(y->GetDataType() == DataType::DT_UINT8,
OP_LOGE(ACLNN_ERR_PARAM_INVALID,
"PopulationCount: y dtype must be UINT8, got %d.",
static_cast<int>(y->GetDataType())),
return false);
return true;
}
static bool CheckFormat(const aclTensor* x, const aclTensor* y)
{
auto xFmt = x->GetStorageFormat();
auto yFmt = y->GetStorageFormat();
OP_CHECK(!(IsPrivateFormat(xFmt) || IsPrivateFormat(yFmt)),
OP_LOGE(ACLNN_ERR_PARAM_INVALID,
"PopulationCount: private format not supported. x=%d, y=%d",
static_cast<int>(xFmt), static_cast<int>(yFmt)),
return false);
return true;
}
static bool CheckShape(const aclTensor* x, const aclTensor* y)
{
OP_CHECK_MAX_DIM(x, ACLNN_MAX_SHAPE_RANK, return false);
OP_CHECK_MAX_DIM(y, ACLNN_MAX_SHAPE_RANK, return false);
auto xShape = x->GetViewShape();
auto yShape = y->GetViewShape();
OP_CHECK(xShape.GetDimNum() == yShape.GetDimNum(),
OP_LOGE(ACLNN_ERR_PARAM_INVALID,
"PopulationCount: x/y dim num mismatch: %zu vs %zu",
xShape.GetDimNum(), yShape.GetDimNum()),
return false);
for (size_t i = 0; i < xShape.GetDimNum(); i++) {
OP_CHECK(xShape.GetDim(i) == yShape.GetDim(i),
OP_LOGE(ACLNN_ERR_PARAM_INVALID,
"PopulationCount: x/y shape mismatch at dim %zu: %ld vs %ld",
i, xShape.GetDim(i), yShape.GetDim(i)),
return false);
}
return true;
}
static aclnnStatus CheckParams(const aclTensor* x, const aclTensor* y)
{
CHECK_COND(CheckNotNull(x, y), ACLNN_ERR_PARAM_NULLPTR, "CheckNotNull failed");
CHECK_COND(CheckDtypeValid(x, y), ACLNN_ERR_PARAM_INVALID,
"CheckDtypeValid failed: x_dtype=%d, y_dtype=%d",
static_cast<int>(x->GetDataType()), static_cast<int>(y->GetDataType()));
CHECK_COND(CheckFormat(x, y), ACLNN_ERR_PARAM_INVALID,
"CheckFormat failed");
CHECK_COND(CheckShape(x, y), ACLNN_ERR_PARAM_INVALID,
"CheckShape failed");
return ACLNN_SUCCESS;
}
* @brief Two-phase L2 API (Phase 1): compute workspace + build executor.
*/
extern "C" aclnnStatus aclnnPopulationCountGetWorkspaceSize(
const aclTensor* x,
const aclTensor* y,
uint64_t* workspaceSize,
aclOpExecutor** executor)
{
L2_DFX_PHASE_1(aclnnPopulationCount, DFX_IN(x), DFX_OUT(y));
auto uniqueExecutor = CREATE_EXECUTOR();
CHECK_RET(uniqueExecutor.get() != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
auto ret = CheckParams(x, y);
CHECK_RET(ret == ACLNN_SUCCESS, ret);
if (x->IsEmpty()) {
*workspaceSize = 0;
uniqueExecutor.ReleaseTo(executor);
return ACLNN_SUCCESS;
}
auto xContiguous = l0op::Contiguous(x, uniqueExecutor.get());
CHECK_RET(xContiguous != nullptr, ACLNN_ERR_INNER_NULLPTR);
const aclTensor* opResult = l0op::PopulationCount(xContiguous, uniqueExecutor.get());
CHECK_RET(opResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
auto viewCopyResult = l0op::ViewCopy(opResult, y, uniqueExecutor.get());
CHECK_RET(viewCopyResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
*workspaceSize = uniqueExecutor->GetWorkspaceSize();
uniqueExecutor.ReleaseTo(executor);
return ACLNN_SUCCESS;
}
* @brief Two-phase L2 API (Phase 2): launch kernel.
*/
extern "C" aclnnStatus aclnnPopulationCount(
void* workspace,
uint64_t workspaceSize,
aclOpExecutor* executor,
aclrtStream stream)
{
L2_DFX_PHASE_2(aclnnPopulationCount);
return CommonOpExecutorRun(workspace, workspaceSize, executor, stream);
}