#include "op_plugin/AclOpsInterface.h"
#include "op_plugin/OpApiInterface.h"
#include "op_plugin/utils/op_api_common.h"
namespace op_api {
namespace {
std::tuple<c10::SmallVector<int64_t, SIZE>, c10::SmallVector<int64_t, SIZE>> nll_loss2d_npu_output_size(
const at::Tensor& self, const at::Tensor& target, int64_t reduction, int64_t ignore_index) {
c10::SmallVector<int64_t, SIZE> outputSize;
c10::SmallVector<int64_t, SIZE> totalWeightSize;
TORCH_CHECK(self.dim() >= 4, "self dim has to be more than or equal 4", OPS_ERROR(ErrCode::PARAM));
if (reduction == at::Reduction::None) {
outputSize = {self.size(0), self.size(2), self.size(3)};
}
return std::tuple<c10::SmallVector<int64_t, SIZE>, c10::SmallVector<int64_t, SIZE>>(outputSize, totalWeightSize);
}
}
std::tuple<at::Tensor&, at::Tensor&> nll_loss2d_forward_out(
const at::Tensor& self, const at::Tensor& target, const c10::optional<at::Tensor>& weight_opt, int64_t reduction,
int64_t ignore_index, at::Tensor& result, at::Tensor& total_weight) {
DO_COMPATIBILITY(aclnnNLLLoss2d, acl_op::nll_loss2d_forward_out(self, target, weight_opt, reduction,
ignore_index, result, total_weight));
at::Tensor weight_tensor = c10::value_or_else(weight_opt, [] { return at::Tensor(); });
if (!weight_tensor.defined()) {
weight_tensor = at::ones(self.size(1), self.options());
}
at_npu::native::OpPreparation::check_memory({self, target, weight_tensor}, {result, total_weight});
EXEC_NPU_CMD(aclnnNLLLoss2d, self, target, weight_tensor, reduction, ignore_index, result, total_weight);
return std::tuple<at::Tensor&, at::Tensor&>(result, total_weight);
}
std::tuple<at::Tensor, at::Tensor> nll_loss2d_forward(const at::Tensor& self,
const at::Tensor& target,
const c10::optional<at::Tensor>& weight_opt,
int64_t reduction, int64_t ignore_index) {
DO_COMPATIBILITY(aclnnNLLLoss2d,
acl_op::nll_loss2d_forward(self, target, weight_opt, reduction, ignore_index));
auto outputSizes = nll_loss2d_npu_output_size(self, target, reduction, ignore_index);
at::Tensor result = at_npu::native::OpPreparation::apply_tensor_without_format(self, std::get<0>(outputSizes));
at::Tensor total_weight = at_npu::native::OpPreparation::apply_tensor_without_format(self, std::get<1>(outputSizes));
nll_loss2d_forward_out(self, target, weight_opt, reduction, ignore_index, result,
total_weight);
return std::tuple<at::Tensor, at::Tensor>(result, total_weight);
}
}