* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "../demo_util.h"
const int32_t DEVICE_ID = 0;
const uint32_t LES_DIM_0 = 8;
const uint32_t LES_DIM_1 = 16384;
const uint32_t LOCALOUT_DIM_0 = 8;
const uint32_t LOCALOUT_DIM_1 = 16384;
const uint32_t HEAD_SIZE = 128;
const uint32_t SP_PARA_DEGREE = 8;
* @brief 准备atb::VariantPack
* @param contextPtr context指针
* @param stream stream
* @param inTensors atb::VariantPack中的输入tensor
* @return atb::Status 错误码
*/
atb::Status PrepareInTensor(atb::Context *contextPtr, aclrtStream stream, atb::SVector<atb::Tensor> &inTensors)
{
atb::Tensor lse;
CHECK_STATUS(CreateTensorFromVector(contextPtr, stream, std::vector<float>{1, 2, 3, 4, 5, 6}, ACL_FLOAT,
aclFormat::ACL_FORMAT_ND, {LES_DIM_0, LES_DIM_1}, lse));
atb::Tensor localout;
CHECK_STATUS(CreateTensorFromVector(contextPtr, stream, std::vector<float>{1, 2, 3, 4, 5, 6}, ACL_FLOAT,
aclFormat::ACL_FORMAT_ND, {LOCALOUT_DIM_0, LOCALOUT_DIM_1, HEAD_SIZE},
localout));
inTensors = {lse, localout};
return atb::ErrorType::NO_ERROR;
}
* @brief 创建一个faupdate operation
* @param faupdateOp 创建一个Operation指针
* @return atb::Status 错误码
*/
atb::Status CreateFaUpdateOperation(atb::Operation **faupdateOp)
{
atb::infer::FaUpdateParam param;
param.faUpdateType = atb::infer::FaUpdateParam::FaUpdateType::DECODE_UPDATE;
param.sp = SP_PARA_DEGREE;
CHECK_STATUS(atb::CreateOperation(param, faupdateOp));
return atb::ErrorType::NO_ERROR;
}
int main(int argc, char **argv)
{
atb::Context *context = nullptr;
void *stream = nullptr;
CHECK_STATUS(aclInit(nullptr));
CHECK_STATUS(aclrtSetDevice(DEVICE_ID));
CHECK_STATUS(atb::CreateContext(&context));
CHECK_STATUS(aclrtCreateStream(&stream));
context->SetExecuteStream(stream);
atb::Operation *faupdateOp = nullptr;
CHECK_STATUS(CreateFaUpdateOperation(&faupdateOp));
atb::VariantPack variantPack;
CHECK_STATUS(PrepareInTensor(context, stream, variantPack.inTensors));
atb::Tensor output;
CHECK_STATUS(CreateTensor(ACL_FLOAT, aclFormat::ACL_FORMAT_ND, {LOCALOUT_DIM_1, HEAD_SIZE}, output));
variantPack.outTensors = {output};
uint64_t workspaceSize = 0;
CHECK_STATUS(faupdateOp->Setup(variantPack, workspaceSize, context));
uint8_t *workspacePtr = nullptr;
if (workspaceSize > 0) {
CHECK_STATUS(aclrtMalloc((void **)(&workspacePtr), workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
}
CHECK_STATUS(faupdateOp->Execute(variantPack, workspacePtr, workspaceSize, context));
CHECK_STATUS(aclrtSynchronizeStream(stream));
for (atb::Tensor &inTensor : variantPack.inTensors) {
CHECK_STATUS(aclrtFree(inTensor.deviceData));
}
for (atb::Tensor &outTensor : variantPack.outTensors) {
CHECK_STATUS(aclrtFree(outTensor.deviceData));
}
if (workspaceSize > 0) {
CHECK_STATUS(aclrtFree(workspacePtr));
}
CHECK_STATUS(atb::DestroyOperation(faupdateOp));
CHECK_STATUS(aclrtDestroyStream(stream));
CHECK_STATUS(DestroyContext(context));
CHECK_STATUS(aclFinalize());
std::cout << "faupdate demo success!" << std::endl;
return 0;
}