* Copyright (c) 2024 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef TORCH_ATB_OPERATION_WRAPPER_H
#define TORCH_ATB_OPERATION_WRAPPER_H
#include <vector>
#include <string>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wfloat-equal"
#include <torch/torch.h>
#include <torch/extension.h>
#include <torch_npu/csrc/framework/OpCommand.h>
#pragma GCC diagnostic pop
#include "atb/atb_infer.h"
namespace TorchAtb {
class OperationWrapper {
public:
OperationWrapper(const OperationWrapper &other) = delete;
OperationWrapper &operator=(const OperationWrapper &other) = delete;
OperationWrapper(OperationWrapper &&other) noexcept : operation_(std::move(other.operation_)){};
OperationWrapper &operator=(OperationWrapper &&other) noexcept;
~OperationWrapper() = default;
explicit OperationWrapper(const atb::infer::LayerNormParam ¶m);
explicit OperationWrapper(const atb::infer::ElewiseParam ¶m);
explicit OperationWrapper(const atb::infer::LinearParam ¶m);
explicit OperationWrapper(const atb::infer::SoftmaxParam ¶m);
explicit OperationWrapper(const atb::infer::SelfAttentionParam ¶m);
explicit OperationWrapper(const atb::infer::PagedAttentionParam ¶m);
explicit OperationWrapper(const atb::infer::RopeParam ¶m);
explicit OperationWrapper(const atb::infer::SplitParam ¶m);
explicit OperationWrapper(const atb::infer::GatherParam ¶m);
explicit OperationWrapper(const atb::infer::ActivationParam ¶m);
explicit OperationWrapper(const atb::infer::RmsNormParam ¶m);
explicit OperationWrapper(const atb::infer::AllGatherParam ¶m);
explicit OperationWrapper(const atb::infer::AsStridedParam ¶m);
explicit OperationWrapper(const atb::infer::CumsumParam ¶m);
explicit OperationWrapper(const atb::infer::DynamicNTKParam ¶m);
explicit OperationWrapper(const atb::infer::MultinomialParam ¶m);
explicit OperationWrapper(const atb::infer::ConcatParam ¶m);
explicit OperationWrapper(const atb::infer::SliceParam ¶m);
explicit OperationWrapper(const atb::infer::TransposeParam ¶m);
explicit OperationWrapper(const atb::infer::GatingParam ¶m);
explicit OperationWrapper(const atb::infer::ReshapeAndCacheParam ¶m);
explicit OperationWrapper(const atb::infer::FillParam ¶m);
explicit OperationWrapper(const atb::infer::RazorFusionAttentionParam ¶m);
explicit OperationWrapper(const atb::infer::AllReduceParam ¶m);
explicit OperationWrapper(const atb::infer::BroadcastParam ¶m);
explicit OperationWrapper(const atb::infer::ReduceScatterParam ¶m);
explicit OperationWrapper(const atb::infer::ReduceScatterVParam ¶m);
explicit OperationWrapper(const atb::infer::FaUpdateParam ¶m);
explicit OperationWrapper(const atb::infer::LinearParallelParam ¶m);
explicit OperationWrapper(const atb::infer::LinearSparseParam ¶m);
explicit OperationWrapper(const atb::infer::RelayAttentionParam ¶m);
explicit OperationWrapper(const atb::infer::TopkToppSamplingParam ¶m);
explicit OperationWrapper(const atb::infer::AllToAllParam ¶m);
explicit OperationWrapper(const atb::GraphParam ¶m);
atb::Operation *ReleaseOperation();
std::string GetName() const;
uint32_t GetInputNum() const;
uint32_t GetOutputNum() const;
std::vector<torch::Tensor> Forward(std::vector<torch::Tensor> &inTensors);
private:
template <typename OpParam> void CreateOpUniquePtr(const OpParam ¶m);
atb::SVector<atb::TensorDesc> InferShape();
void Setup(std::vector<torch::Tensor> &inTensors, std::vector<torch::Tensor> &outTensors);
void Execute();
void BuildInTensorVariantPack(std::vector<torch::Tensor> &inTensors);
void BuildOutTensorVariantPack();
private:
std::unique_ptr<atb::Operation> operation_;
atb::VariantPack variantPack_;
uint64_t workspaceSize_{0};
};
}
#endif