#include "csrc/OpApiCommon.h"
#include "csrc/functions.h"
namespace {
constexpr int64_t MAX_INCHANNEL = 1024;
const int64_t DIVISOR = 8;
const size_t INITIAL_CAPACITY = 2;
}
at::Tensor npu_sparse_inverse_conv3d(const at::Tensor& features, const at::Tensor& origin_indices,
const at::Tensor& unique_indices_offset, const at::Tensor& sorted_idx_to_former_indices,
at::IntArrayRef kernel_size, int in_channel)
{
TORCH_CHECK_NPU(features);
TORCH_CHECK_NPU(origin_indices);
TORCH_CHECK_NPU(unique_indices_offset);
TORCH_CHECK_NPU(sorted_idx_to_former_indices);
TORCH_CHECK(in_channel <= MAX_INCHANNEL,
"in_channel must less or equal than 1024 expected but got in_channel: ", in_channel);
TORCH_CHECK(in_channel % DIVISOR == 0, "in_channel must be divisible by 8 but got in_channel: ", in_channel);
int64_t kernel_sum = 1;
for (int32_t i = 0; i < static_cast<int32_t>(kernel_size.size()); i++) {
kernel_sum *= kernel_size[i];
}
c10::SmallVector<int64_t, INITIAL_CAPACITY> output_size = {origin_indices.sizes()[0], kernel_sum * in_channel};
at::Tensor output_img2col = at::zeros(output_size, features.options());
EXEC_NPU_CMD(aclnnSparseInverseConv3d, features, origin_indices, unique_indices_offset,
sorted_idx_to_former_indices, kernel_size, in_channel, output_img2col);
return output_img2col;
}