#include "op_plugin/AclOpsInterface.h"
#include "op_plugin/utils/OpAdapter.h"
namespace acl_op {
using npu_preparation = at_npu::native::OpPreparation;
using npu_utils = at_npu::native::NpuUtils;
namespace {
at::SmallVector<int64_t, SIZE> stack_npu_output_size(at::TensorList tensors, int64_t dim)
{
dim = op_plugin::utils::make_warp_dim(dim, tensors[0].dim() + 1);
at::SmallVector<int64_t, SIZE> shape;
for (int i = 0; i < dim; i++) {
shape.emplace_back(tensors[0].size(i));
}
shape.emplace_back(tensors.size());
for (int i = dim; i < tensors[0].dim(); i++) {
shape.emplace_back(tensors[0].size(i));
}
return shape;
}
at::Tensor &stack_out_nocheck(at::Tensor &result, at::TensorList tensors, int64_t dim)
{
c10::SmallVector<at::Tensor, N> input_tensors;
for (uint i = 0; i < tensors.size(); i++) {
input_tensors.emplace_back(tensors[i]);
}
auto dynamic_num = input_tensors.size();
at_npu::native::OpCommand cmd;
cmd.Name("Pack");
for (uint i = 0; i < dynamic_num; i++) {
string input_name = "x" + std::to_string(i);
cmd.Input(input_tensors[i], input_name);
}
cmd.Output(result).Attr("N", static_cast<int64_t>(tensors.size())).Attr("axis", dim).Run();
return result;
}
}
at::Tensor &stack_out(at::TensorList tensors, int64_t dim, at::Tensor &out)
{
auto output_size = stack_npu_output_size(tensors, dim);
npu_preparation::CheckOut({tensors[0]}, out, ACL_FORMAT_ND, tensors[0].scalar_type(), output_size);
if (!npu_utils::check_match(&out)) {
at::Tensor contiguous_result = npu_utils::format_contiguous(out);
stack_out_nocheck(contiguous_result, tensors, dim);
npu_utils::format_fresh_view(out, contiguous_result);
} else {
stack_out_nocheck(out, tensors, dim);
}
return out;
}
at::Tensor stack(at::TensorList tensors, int64_t dim)
{
auto output_size = stack_npu_output_size(tensors, dim);
at::Tensor result = npu_preparation::apply_tensor_with_format(output_size, tensors[0].options(), ACL_FORMAT_ND);
stack_out_nocheck(result, tensors, dim);
return result;
}
}