#include "op_plugin/AclOpsInterface.h"
#include "op_plugin/utils/OpAdapter.h"
namespace acl_op {
using npu_preparation = at_npu::native::OpPreparation;
std::vector<at::Tensor> npu_fused_attention_layernorm_qkv_fwd(
const at::Tensor &x, const at::Tensor &kernel_query, const at::Tensor &kernel_key, const at::Tensor &kernel_value,
const at::Tensor &gamma, const at::Tensor &beta, const c10::optional<at::Tensor> &bias_query,
const c10::optional<at::Tensor> &bias_key, const c10::optional<at::Tensor> &bias_value, int64_t seq_len,
int64_t num_heads, double eps)
{
TORCH_CHECK(seq_len != 0 && num_heads != 0, "seq_len and num_heads cannot be equal to 0."
+ OPS_ERROR(ErrCode::VALUE));
TORCH_CHECK(x.dim() >= 2, "x must be at least 2 dimensions, but got ", x.dim(),
OPS_ERROR(ErrCode::PARAM));
TORCH_CHECK(x.size(0) % seq_len == 0 && x.size(1) % num_heads == 0,
"In npu_fused_attention_layernorm_qkv_fwd, x.size(0) should be "
"divisible by seq_len",
"and x.size(1) should be divisible by num_heads."
+ OPS_ERROR(ErrCode::PARAM));
const at::Tensor &bias_query_input = c10::value_or_else(bias_query, [] { return at::Tensor(); });
const at::Tensor &bias_key_input = c10::value_or_else(bias_key, [] { return at::Tensor(); });
const at::Tensor &bias_value_input = c10::value_or_else(bias_value, [] { return at::Tensor(); });
c10::SmallVector<int64_t, SIZE> qkv_output_shape = {x.size(0) / seq_len, num_heads, seq_len, x.size(1) / num_heads};
c10::SmallVector<int64_t, SIZE> mean_output_shape = {x.size(0)};
at::Tensor norm = npu_preparation::apply_tensor(x);
at::Tensor query_output = npu_preparation::apply_tensor(kernel_query, qkv_output_shape);
at::Tensor key_output = npu_preparation::apply_tensor(kernel_key, qkv_output_shape);
at::Tensor value_output = npu_preparation::apply_tensor(kernel_value, qkv_output_shape);
at::Tensor mean = npu_preparation::apply_tensor_with_format(kernel_query, mean_output_shape, ACL_FORMAT_ND);
at::Tensor variance = npu_preparation::apply_tensor_with_format(kernel_query, mean_output_shape, ACL_FORMAT_ND);
at_npu::native::OpCommand cmd;
cmd.Name("AttentionLnQKV")
.Input(x)
.Input(kernel_query)
.Input(kernel_key)
.Input(kernel_value)
.Input(gamma)
.Input(beta)
.Input(bias_query_input)
.Input(bias_key_input)
.Input(bias_value_input)
.Output(norm)
.Output(query_output)
.Output(key_output)
.Output(value_output)
.Output(mean)
.Output(variance)
.Attr("epsilon", static_cast<float>(eps))
.Run();
std::vector<at::Tensor> results = {norm, query_output, key_output, value_output, mean, variance};
return results;
}
}