* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "gtest/gtest.h"
#include <array>
#include <iostream>
#include <memory>
#include <stdlib.h>
#include "acl/acl.h"
#include "aclnn/acl_meta.h"
#include "aclnn/aclnn_base.h"
#include "kernel_mgr.h"
#include "memset_ctx_holder.h"
#include "opdev/make_op_executor.h"
#include "opdev/op_def.h"
#include "opdev/op_dfx.h"
#include "opdev/op_cache.h"
#include "op_cache_internal.h"
#include "opdev/op_errno.h"
#include "register/op_impl_registry.h"
#include "rts_arg.h"
#include "opdev/aicpu/aicpu_task.h"
#include "thread_local_context.h"
#include "kernel_workspace.h"
#include "kernel_launcher.h"
#include "depends/platform/platform_stub.h"
#include "runtime/runtime/rts/rts_kernel.h"
#include "test_comp_op_common.h"
using namespace op::internal;
using namespace op::internal::test;
extern inline uint32_t SortOpTypeId();
extern inline uint32_t GeluOpTypeId();
extern op::internal::OpExecCacheWrap* GetOpExecCacheFromExecutor(aclOpExecutor* executor);
extern "C" aclOpExecutor* PTAFindExecCache(uint8_t* buf, size_t len, uint64_t* workspaceSize);
extern "C" aclOpExecutor* PTAGetExecCache(uint64_t hash, uint64_t* workspaceSize);
extern "C" void InitPTACacheThreadLocal();
extern "C" void UnInitPTACacheThreadLocal();
extern "C" void ResetCacheThreadLocal();
extern "C" bool CanUsePTACache(const char* api);
extern "C" void AddTensorAddrToCachedList(void* addr);
extern "C" void SetPTAHashKey(uint64_t hash);
extern "C" void SetPTACacheHashKey(uint8_t* key, size_t len);
class OpCacheUt : public testing::Test {
protected:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
};
aclError aclrtBinaryLoadFromFileInvoked(const char_t* const binPath, const rtLoadBinaryConfig_t* const optionalCfg,
rtBinHandle* handle)
{
rtBinHandle handle_tmp = nullptr;
*handle = &handle_tmp;
return RT_ERROR_NONE;
}
aclError aclrtFuncGetByNameInvoked(const rtBinHandle binHandle, const char_t* kernelName, rtFuncHandle* funcHandle)
{
rtFuncHandle funcHandle_tmp = nullptr;
*funcHandle = &funcHandle_tmp;
return RT_ERROR_NONE;
}
TEST_F(OpCacheUt, CreateHashTest)
{
op::Shape tShape{1, 2, 3};
aclTensor t1(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t2(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t3(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
const aclTensor* tensorPtr1 = &t1;
aclTensor* tensorPtr2 = &t2;
aclTensor* list2[] = {&t1, &t2, &t3, nullptr};
aclTensorList tensorList(list2, 4);
const aclTensorList* tensorListPtr = &tensorList;
aclTensorList* tensorList2Ptr = &tensorList;
bool boolValue[] = {true, false, true};
aclBoolArray array1(boolValue, 3);
const aclBoolArray* array1Ptr = &array1;
float floatValue[] = {1.1, 2.2, 3.3};
aclFloatArray array2(floatValue, 3);
const aclFloatArray* array2Ptr = &array2;
int64_t intValue[] = {1, 2, 3};
aclIntArray array3(intValue, 3);
const aclIntArray* array3Ptr = &array3;
float value = 3.1;
std::vector<op::fp16_t> values(3, value);
aclFp16Array array4(values.data(), 3);
const aclFp16Array* array4Ptr = &array4;
float fpValue = 3.2;
float fpValue2 = 3.3;
aclScalar scalar1(&fpValue, op::DataType::DT_FLOAT);
aclScalar scalar2(&fpValue2, op::DataType::DT_FLOAT);
aclScalar scalar3(&fpValue2, op::DataType::DT_FLOAT);
const aclScalar* scalarPtr = &scalar1;
aclScalar* list3[] = {&scalar1, &scalar2, &scalar3, nullptr};
aclScalarList scalarList(list3, 4);
aclScalarList* scalarListPtr1 = &scalarList;
const aclScalarList* scalarListPtr2 = &scalarList;
std::string apiName = "aclnnClamp";
const aclDataType dataType = aclDataType::ACL_FLOAT;
const aclBoolArray* arrayNull1 = nullptr;
const aclFloatArray* arrayNull2 = nullptr;
const aclIntArray* arrayNull3 = nullptr;
const aclFp16Array* arrayNull4 = nullptr;
const aclTensorList* tensorListNull1 = nullptr;
aclTensorList* tensorListNull2 = nullptr;
const aclScalarList* scalarListNull1 = nullptr;
aclScalarList* scalarListNull2 = nullptr;
const aclScalar* scalarNull1 = nullptr;
int dim = 0;
bool keepdim = true;
auto in0 = std::make_tuple(tensorPtr1, tensorListPtr, scalarPtr, scalarListPtr1, array1Ptr, array2Ptr,
tensorList2Ptr, arrayNull1, arrayNull2, arrayNull3, dim);
auto out0 = std::make_tuple(array3Ptr, array4Ptr, tensorPtr2, scalarListPtr2, dataType, arrayNull4, tensorListNull1,
tensorListNull2, scalarListNull1, scalarListNull2, scalarNull1, keepdim);
OpCacheKey key;
UnInitPTACacheThreadLocal();
InitExecutorCacheThreadLocal();
AddParamToBuf(apiName);
AddOpConfigInfoToBuf();
CalculateHashKey(in0);
CalculateHashKey(out0);
AddSeperator();
SetOpCacheKey(key);
char* hashBuf =
"0x61636c6e6e436c616d700000000000000000000100000000000000020000000000000003000000000000002c06000000000000000300"
"00000000000001000000000000002c0100000000000000020000000000000003000000000000002c030000002c00000000000000002c02"
"0000000100000000000000020000000000000003000000000000002c0600000000000000030000000000000001000000000000002c0100"
"000000000000020000000000000003000000000000002c030000002c00000000000000002c020000000100000000000000020000000000"
"000003000000000000002c0600000000000000030000000000000001000000000000002c01000000000000000200000000000000030000"
"00000000002c030000002c00000000000000002c020000000100000000000000020000000000000003000000000000002c060000000000"
"0000030000000000000001000000000000002c0100000000000000020000000000000003000000000000002c030000002c000000000000"
"00002c020000002c0400000000000000cdcc4c402c000000002ccdcc4c402c000000002c333353402c000000002c333353402c00000000"
"2c2c04000000000000000100010300000000000000cdcc8c3fcdcc0c403333534003000000000000000100000000000000020000000000"
"000003000000000000002c0600000000000000030000000000000001000000000000002c01000000000000000200000000000000030000"
"00000000002c030000002c00000000000000002c020000000100000000000000020000000000000003000000000000002c060000000000"
"0000030000000000000001000000000000002c0100000000000000020000000000000003000000000000002c030000002c000000000000"
"00002c020000000100000000000000020000000000000003000000000000002c0600000000000000030000000000000001000000000000"
"002c0100000000000000020000000000000003000000000000002c030000002c00000000000000002c020000002c04000000000000002c"
"2c2c0000000001000000000000000200000000000000030000000000000003000000000000003342334233420300000000000000010000"
"0000000000020000000000000003000000000000002c0600000000000000030000000000000001000000000000002c0100000000000000"
"020000000000000003000000000000002c030000002c00000000000000002c02000000cdcc4c402c000000002c333353402c000000002c"
"333353402c000000002c2c0400000000000000000000002c2c2c2c2c2c012c000000000000000000000000000000000100000000000000"
"02000000000000000000000000000000010000000000000002000000000000000100000000000000";
EXPECT_STREQ(key.ToString().GetString(), hashBuf);
EXPECT_EQ(key.len, 1029);
auto cache = GetOpExecCache(key);
EXPECT_EQ(cache, nullptr);
auto cache1 = GetOpExecCache(11);
EXPECT_EQ(cache1, nullptr);
uint64_t workspaceSize;
auto executor = PTAFindExecCache(key.buf, key.len, &workspaceSize);
EXPECT_EQ(executor, nullptr);
InitPTACacheThreadLocal();
}
TEST_F(OpCacheUt, RtsArgCacheTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 32;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
op::internal::PrintRtArg(arg.GetRtsArg());
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_NE(cacheCtx, nullptr);
aclrtStream stream = 0;
op::internal::LaunchArgCache::RunFromCache(stream, GetCacheBuf());
auto opExecCacheWrap = CreateCacheWrap(opExecCache);
auto wrap = GetOpExecCacheFromExecutor(reinterpret_cast<aclOpExecutor*>(opExecCacheWrap));
EXPECT_NE(wrap, nullptr);
bool succ = AddOpExecCache(opExecCache);
EXPECT_EQ(succ, true);
delete opExecCacheWrap;
op::DestroyOpArgContext(ctx);
InitPTACacheThreadLocal();
}
TEST_F(OpCacheUt, RtsArgCacheRepeatSetTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 60;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
op::internal::PrintRtArg(arg.GetRtsArg());
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
EXPECT_EQ(opExecCache->IsOpCacheValid(), true);
opExecCache->SetUse();
EXPECT_EQ(opExecCache->CanUse(), true);
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_EQ(cacheCtx, nullptr);
opExecCache->MarkOpCacheInvalid();
EXPECT_EQ(opExecCache->IsOpCacheValid(), true);
op::DestroyOpArgContext(ctx);
delete opExecCache;
}
TEST_F(OpCacheUt, ExecutorCacheRepeatSetTest)
{
bool oriOption = GetThreadLocalContext().cacheHasFull_;
GetThreadLocalContext().cacheHasFull_ = false;
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"ExecutorCacheRepeatSetTest";
GetThreadLocalContext().cacheHashKeyLen_ = 27;
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
executor->SetRepeatable();
EXPECT_EQ(executor->IsRepeatable(), true);
auto opExecCache = executor->GetOpExecCache();
int* intPtr = new int;
void* ptr = reinterpret_cast<void*>(intPtr);
opExecCache->SetCacheBuf(ptr);
EXPECT_EQ(opExecCache->cacheBuf_, ptr);
opExecCache->SetUse();
EXPECT_EQ(opExecCache->CanUse(), true);
executor->UpdateTensorAddr(nullptr, 0);
EXPECT_EQ(opExecCache->cacheBuf_, ptr);
executor->FinalizeCache();
EXPECT_EQ(opExecCache->cacheBuf_, ptr);
delete intPtr;
GetThreadLocalContext().cacheHasFull_ = oriOption;
}
TEST_F(OpCacheUt, RtsArgCacheTestHostDataNullptr)
{
op::Shape outShape{0};
int64_t inputData[8];
aclIntArray self(inputData, 0);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 60;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
op::internal::PrintRtArg(arg.GetRtsArg());
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_NE(cacheCtx, nullptr);
op::DestroyOpArgContext(ctx);
InitPTACacheThreadLocal();
delete opExecCache;
}
TEST_F(OpCacheUt, AbnormalRtsArgCacheTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 60;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_NE(cacheCtx, nullptr);
op::internal::KernelLaunchConfig launchCfg;
launchCfg.binHandle = (void*)0x12341234;
launchCfg.funcHandle = (void*)0x12345678;
launchCfg.tilingKey = 1234;
launchCfg.numBlocks = 32;
launchCfg.schemMode = 1;
launchCfg.dynUBufSize = 0;
launchCfg.blockDimOffset = 0;
launchCfg.engineType = op::internal::LaunchKernelEngineType::VECTOR_CORE_ENGINE_AIC;
launchCfg.isFatBin = false;
std::string kernelName = "abs";
cacheCtx->SetRunParam(launchCfg, kernelName);
EXPECT_EQ(cacheCtx->launchCfg_.binHandle, launchCfg.binHandle);
EXPECT_EQ(cacheCtx->launchCfg_.funcHandle, launchCfg.funcHandle);
EXPECT_EQ(cacheCtx->launchCfg_.tilingKey, launchCfg.tilingKey);
EXPECT_EQ(cacheCtx->launchCfg_.numBlocks, launchCfg.numBlocks);
EXPECT_EQ(cacheCtx->launchCfg_.schemMode, launchCfg.schemMode);
EXPECT_EQ(cacheCtx->launchCfg_.dynUBufSize, launchCfg.dynUBufSize);
EXPECT_EQ(cacheCtx->launchCfg_.blockDimOffset, launchCfg.blockDimOffset);
EXPECT_EQ(cacheCtx->launchCfg_.engineType, launchCfg.engineType);
EXPECT_EQ(cacheCtx->launchCfg_.isFatBin, launchCfg.isFatBin);
EXPECT_STREQ(cacheCtx->launchCfg_.kernelNameOfNoFatBin, kernelName.c_str());
aclrtStream stream = 0;
auto res = op::internal::LaunchArgCache::RunFromCache(stream, nullptr);
EXPECT_EQ(res, ACLNN_ERR_INNER);
res = op::internal::LaunchArgCache::RunFromCache(stream, GetCacheBuf());
EXPECT_EQ(res, ACLNN_SUCCESS);
op::DestroyOpArgContext(ctx);
InitPTACacheThreadLocal();
delete opExecCache;
}
TEST_F(OpCacheUt, CacheShrink)
{
setenv("ACLNN_CACHE_LIMIT", "1", 1);
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello1";
GetThreadLocalContext().cacheHashKeyLen_ = 6;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
aclrtStream stream = 0;
op::internal::LaunchArgCache::RunFromCache(stream, GetCacheBuf());
auto opExecCacheWrap = CreateCacheWrap(opExecCache);
auto wrap = GetOpExecCacheFromExecutor(reinterpret_cast<aclOpExecutor*>(opExecCacheWrap));
EXPECT_NE(wrap, nullptr);
bool succ = AddOpExecCache(opExecCache);
EXPECT_EQ(succ, true);
opExecCache->OldCacheClear();
delete opExecCacheWrap;
InitPTACacheThreadLocal();
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello2";
GetThreadLocalContext().cacheHashKeyLen_ = 6;
auto opExecCache2 = new OpExecCache();
opExecCache2->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache2);
aclrtStream stream2 = 0;
op::internal::LaunchArgCache::RunFromCache(stream2, GetCacheBuf());
auto opExecCacheWrap2 = CreateCacheWrap(opExecCache2);
auto wrap2 = GetOpExecCacheFromExecutor(reinterpret_cast<aclOpExecutor*>(opExecCacheWrap2));
EXPECT_NE(wrap2, nullptr);
succ = AddOpExecCache(opExecCache2);
EXPECT_EQ(succ, true);
opExecCache2->OldCacheClear();
delete opExecCacheWrap2;
InitPTACacheThreadLocal();
unsetenv("ACLNN_CACHE_LIMIT");
}
TEST_F(OpCacheUt, CacheUseTest)
{
OpExecCache opExecCache;
auto opExecCacheWrap = CreateCacheWrap(&opExecCache);
auto wrap = reinterpret_cast<aclOpExecutor*>(opExecCacheWrap);
aclrtStream stream = 0;
CommonOpExecutorRun(nullptr, 0, wrap, stream);
}
TEST_F(OpCacheUt, UsePTACache)
{
std::string api = "aclnnAdd";
bool use = CanUsePTACache(api.c_str());
}
TEST_F(OpCacheUt, SetPTACache)
{
InitPTACacheThreadLocal();
op::internal::GetThreadLocalContext().cachedTensorList_.clear();
constexpr size_t ARR_SIZE = 6;
int arr[ARR_SIZE] = {0};
for (int i = 0; i < ARR_SIZE; i++) {
AddTensorAddrToCachedList(&arr[i]);
}
EXPECT_EQ(op::internal::GetThreadLocalContext().cachedTensorListSize_, ARR_SIZE);
EXPECT_EQ(op::internal::GetThreadLocalContext().cachedTensorList_.size(), ARR_SIZE);
InitPTACacheThreadLocal();
int arr2[ARR_SIZE] = {0};
AddTensorAddrToCachedList(&arr2[0]);
AddTensorAddrToCachedList(&arr2[1]);
EXPECT_EQ(op::internal::GetThreadLocalContext().cachedTensorListSize_, 2);
EXPECT_EQ(op::internal::GetThreadLocalContext().cachedTensorList_.size(), ARR_SIZE);
uint64_t hash = 0x12345678;
SetPTAHashKey(hash);
EXPECT_EQ(op::internal::GetThreadLocalContext().hashKey_, hash);
char* hashKey = "aclnnMatmul12345678";
SetPTACacheHashKey(reinterpret_cast<uint8_t*>(hashKey), strlen(hashKey));
EXPECT_STREQ(reinterpret_cast<const char*>(op::internal::GetThreadLocalContext().cacheHashKey_), hashKey);
EXPECT_EQ(op::internal::GetThreadLocalContext().cacheHashKeyLen_, strlen(hashKey));
OpCacheKey key;
SetOpCacheKey(key);
EXPECT_STREQ(reinterpret_cast<char*>(key.buf), hashKey);
EXPECT_EQ(key.len, strlen(hashKey));
UnInitPTACacheThreadLocal();
}
TEST_F(OpCacheUt, PtrListCacheTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclIntArray self2(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
aclTensor inputTensor2(&self2, op::DataType::DT_INT32);
const aclTensor* inputArr[] = {&inputTensor, &inputTensor2};
aclTensorList* inputTensors = aclCreateTensorList(inputArr, 2);
auto input_arg = OP_INPUT(inputTensors);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 60;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, true, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_NE(cacheCtx, nullptr);
aclrtStream stream = 0;
op::internal::LaunchArgCache::RunFromCache(stream, GetCacheBuf());
delete inputTensors;
delete opExecCache;
op::DestroyOpArgContext(ctx);
}
TEST_F(OpCacheUt, StaticRtsArgCacheTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
tilingData->data_size_ = 0;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
op::internal::PrintRtArg(arg.GetRtsArg());
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_EQ(cacheCtx, nullptr);
opExecCache->Finalize();
opExecCache->SetUse();
EXPECT_EQ(opExecCache->CanUse(), false);
ResetCacheThreadLocal();
delete opExecCache;
InitPTACacheThreadLocal();
GetOpCacheContext().SetOpCache(nullptr);
op::DestroyOpArgContext(ctx);
}
TEST_F(OpCacheUt, CacheExceptionDumpTest)
{
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
op::Tensor* inputOpTensor = inputTensor.GetTensor();
op::Tensor* outputOpTensor = out.GetTensor();
OpLogInfo opLogInfo;
ExceptionDumpInfo exceptionDumpInfo;
aclrtStream stream = (aclrtStream)0x1;
PrepareExceptionDumpInfo({inputOpTensor}, {outputOpTensor}, opLogInfo, exceptionDumpInfo, stream);
}
TEST_F(OpCacheUt, CreateHashGetCacheSuccessTest1)
{
GetThreadLocalContext().cacheHasFull_ = false;
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
setenv("ACLNN_CACHE_LIMIT", "10000000", 1);
op::Shape selfShape{33, 15, 14, 48};
op::Shape otherShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
op::Shape idxShape{33, 15, 14, 48};
auto self = std::make_unique<aclTensor>(selfShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(outShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto idx = std::make_unique<aclTensor>(idxShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
const aclTensor* wsArr[] = {out.get(), idx.get()};
SortOpTypeId();
uint32_t opType = op::OpTypeDict::ToOpType("Sort");
int64_t dim = 0;
bool descending = true;
std::string apiName = "aclnnArgsort";
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get(), idx.get());
auto attr = OP_ATTR(dim, descending);
auto in0 = std::make_tuple(self.get(), dim, descending);
auto out0 = std::make_tuple(out.get(), idx.get());
aclrtStream stream = 0;
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnArgsort";
op::internal::GetThreadLocalContext().logInfo_.l2SequenceCounter = OpGetLogSequence();
GetThreadLocalContext().logInfo_.l0Name = "Sort";
OpCacheKey key;
AddParamToBuf(apiName);
CalculateHashKey(in0);
CalculateHashKey(out0);
SetOpCacheKey(key);
char* hashBuf =
"0x61636c6e6e417267736f727421000000000000000f000000000000000e0000000000000030000000000000002c6027000000000000a0"
"02000000000000300000000000000001000000000000002c21000000000000000f000000000000000e0000000000000030000000000000"
"002c010000002c00000000000000002c0200000000000000000000000121000000000000000f000000000000000e000000000000003000"
"0000000000002c6027000000000000a002000000000000300000000000000001000000000000002c21000000000000000f000000000000"
"000e0000000000000030000000000000002c010000002c00000000000000002c0200000021000000000000000f000000000000000e0000"
"000000000030000000000000002c6027000000000000a002000000000000300000000000000001000000000000002c2100000000000000"
"0f000000000000000e0000000000000030000000000000002c030000002c00000000000000002c02000000000000000000000001000000"
"000000000200000000000000";
EXPECT_STREQ(key.ToString().GetString(), hashBuf);
EXPECT_EQ(key.len, 396);
auto cache = GetOpExecCache(key);
EXPECT_EQ(cache, nullptr);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
EXPECT_NE(executor->GetOpExecCache(), nullptr);
thread_local uint64_t kernelLaunchIdDefinedInL0Dfx = GenKernelLauncherId("Sort");
ProfilingInfoId profilingInfoId(0, kernelLaunchIdDefinedInL0Dfx, 0);
op::internal::GetLauncherCtx().ClearTilingCache();
aclTensorList* workspace = nullptr;
auto ctx2 = op::MakeOpArgContext(input, output, attr);
GetWorkspace(opType, &workspace, executor, *ctx2->GetOpArg(op::OpArgDef::OP_INPUT_ARG),
*ctx2->GetOpArg(op::OpArgDef::OP_OUTPUT_ARG), *ctx2->GetOpArg(op::OpArgDef::OP_ATTR_ARG));
op::DestroyOpArgContext(ctx2);
auto ctx = op::MakeOpArgContext(OP_WORKSPACE(workspace), OP_INPUT(self.get()), OP_OUTPUT(out.get(), idx.get()),
OP_ATTR(dim, descending));
auto* launcher = new op::AiCoreKernelLauncher{opType, op::AI_CORE, profilingInfoId, executor, ctx};
launcher->SaveLaunchCtx(std::move(op::internal::GetLauncherCtx()));
executor->AddToKernelLauncherList(launcher);
uint64_t workspaceSize = uniqueExecutor->GetWorkspaceSize();
uniqueExecutor.ReleaseTo(&executor);
auto* cachePtr = executor->GetOpExecCache();
EXPECT_NE(cachePtr, nullptr);
GetOpCacheContext().SetOpCache(cachePtr);
executor->SetStream(stream);
executor->UpdateTensorAddr(workspace, workspaceSize);
auto rc = launcher->Launch();
EXPECT_EQ(rc, ACL_SUCCESS);
OpCacheKey opCacheKey = executor->GetOpExecCache()->GetOpCacheKey();
delete executor;
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnArgsort";
auto cache1 = GetOpExecCache(opCacheKey);
EXPECT_EQ(cache1, cachePtr);
std::vector<void*> tensors;
tensors.push_back(const_cast<void*>(reinterpret_cast<const void*>(self.get()->GetStorage())));
op::internal::opProfilingSwitch.kernelLaunchFlag = true;
op::internal::opProfilingSwitch.additionInfoFlag = true;
cache1->Run(nullptr, stream, tensors);
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnArgsortInplace";
auto uniqueExecutorInplace = CREATE_EXECUTOR();
OpCacheKey keyInplace;
AddParamToBuf(apiName);
CalculateHashKey(in0);
CalculateHashKey(out0);
SetOpCacheKey(keyInplace);
EXPECT_NE(keyInplace.buf, nullptr);
EXPECT_NE(keyInplace.len, 0);
auto cacheInplace = GetOpExecCache(keyInplace);
EXPECT_EQ(cacheInplace, nullptr);
aclOpExecutor* executorInplace = uniqueExecutorInplace.get();
EXPECT_NE(executorInplace->GetOpExecCache(), nullptr);
ProfilingInfoId profilingInfoIdInplace(0, kernelLaunchIdDefinedInL0Dfx, 0);
op::internal::GetLauncherCtx().ClearTilingCache();
aclTensorList* workspaceInplace = nullptr;
auto ctx3 = op::MakeOpArgContext(input, output, attr);
GetWorkspace(opType, &workspaceInplace, executorInplace, *ctx3->GetOpArg(op::OpArgDef::OP_INPUT_ARG),
*ctx3->GetOpArg(op::OpArgDef::OP_OUTPUT_ARG), *ctx3->GetOpArg(op::OpArgDef::OP_ATTR_ARG));
op::DestroyOpArgContext(ctx3);
auto ctx4 = op::MakeOpArgContext(OP_WORKSPACE(workspaceInplace), OP_INPUT(self.get()),
OP_OUTPUT(out.get(), idx.get()), OP_ATTR(dim, descending));
auto* launcherInplace = new op::AiCoreKernelLauncher{opType, op::AI_CORE, profilingInfoIdInplace, executorInplace,
ctx4};
launcherInplace->SaveLaunchCtx(std::move(op::internal::GetLauncherCtx()));
executorInplace->AddToKernelLauncherList(launcherInplace);
aclrtStream streamInplace = 0;
uint64_t workspaceSizeInplace = uniqueExecutorInplace->GetWorkspaceSize();
uniqueExecutorInplace.ReleaseTo(&executorInplace);
EXPECT_NE(executorInplace->GetOpExecCache(), nullptr);
OpCacheKey opCacheKeyInplace = executorInplace->GetOpExecCache()->GetOpCacheKey();
GetOpCacheContext().SetOpCache(executorInplace->GetOpExecCache());
executorInplace->SetStream(streamInplace);
executorInplace->UpdateTensorAddr(workspaceInplace, workspaceSizeInplace);
auto rcInplace = launcherInplace->Launch();
EXPECT_EQ(rcInplace, ACL_SUCCESS);
delete executorInplace;
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnArgsortInplace";
auto cacheInplace1 = GetOpExecCache(opCacheKeyInplace);
EXPECT_NE(cacheInplace1, nullptr);
EXPECT_NE(cacheInplace1, cache1);
GetThreadLocalContext().usePTAHash_ = usePTAHash;
GetOpCacheContext().SetOpCache(nullptr);
setenv("ACLNN_CACHE_LIMIT", "1", 1);
}
TEST_F(OpCacheUt, CreateHashAbnormalBufferOverflow)
{
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
op::Shape tShape{1, 2, 3};
aclTensor t1(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t2(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t3(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t4(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t5(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t6(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t7(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t8(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t9(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t10(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t11(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t12(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t13(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t14(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t15(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t16(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t17(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t18(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor t19(tShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor* listTensor[] = {&t1, &t2, &t3, &t4, &t5, &t6, &t7, &t8, &t9, &t10,
&t11, &t12, &t13, &t14, &t15, &t16, &t17, &t18, &t19, nullptr};
aclTensorList tensorList(listTensor, 20);
const aclTensorList* tensorListPtr = &tensorList;
aclTensorList* tensorList2Ptr = &tensorList;
aclTensorList* tensorList3Ptr = &tensorList;
aclTensorList* tensorList4Ptr = &tensorList;
aclTensorList* tensorList5Ptr = &tensorList;
int64_t intValue[] = {1, 2, 3};
aclIntArray array3(intValue, 3);
const aclIntArray* array3Ptr = &array3;
float fpValue = 3.2;
aclScalar scalar(&fpValue, op::DataType::DT_FLOAT);
const aclScalar* scalarPtr = &scalar;
std::string apiName = "aclnnClamp";
const aclDataType dataType = aclDataType::ACL_FLOAT;
const aclFp16Array* arrayNull4 = nullptr;
const aclTensorList* tensorListNull1 = nullptr;
aclTensorList* tensorListNull2 = nullptr;
int dim = 0;
bool keepdim = true;
auto in0 = std::make_tuple(tensorListPtr, tensorList2Ptr, tensorList3Ptr, tensorList4Ptr, tensorList5Ptr, dim,
keepdim);
auto out0 = std::make_tuple(array3Ptr, scalarPtr, dataType, arrayNull4, tensorListNull1, tensorListNull2);
OpCacheKey key;
AddParamToBuf(apiName);
CalculateHashKey(in0);
CalculateHashKey(out0);
AddSeperator();
SetOpCacheKey(key);
EXPECT_EQ(key.buf, nullptr);
EXPECT_EQ(key.len, 0);
auto cache = GetOpExecCache(key);
EXPECT_EQ(cache, nullptr);
uint64_t workspaceSize;
auto executor = PTAFindExecCache(key.buf, key.len, &workspaceSize);
EXPECT_EQ(executor, nullptr);
uint64_t hash = 11;
executor = PTAGetExecCache(hash, &workspaceSize);
EXPECT_EQ(executor, nullptr);
InitPTACacheThreadLocal();
GetThreadLocalContext().usePTAHash_ = usePTAHash;
}
TEST_F(OpCacheUt, AbnormalIncludeOpOutShape)
{
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
op::Shape tShape{1, 2, 3};
auto self = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto outShape = std::make_unique<aclTensor>(tShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
int64_t dim = 0;
bool descending = true;
auto attr = OP_ATTR(dim, descending);
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get());
auto in0 = std::make_tuple(self.get());
auto out0 = std::make_tuple(out.get());
std::string apiName = "aclnnGelu";
uint32_t OpTypeId = op::OpTypeDict::ToOpType("Gelu");
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnGelu";
op::internal::GetThreadLocalContext().logInfo_.l2SequenceCounter = OpGetLogSequence();
auto uniqueExecutor = CREATE_EXECUTOR();
OpCacheKey key;
AddParamToBuf(apiName);
CalculateHashKey(in0);
CalculateHashKey(out0);
SetOpCacheKey(key);
EXPECT_NE(key.buf, nullptr);
EXPECT_NE(key.len, 0);
auto cache = GetOpExecCache(key);
EXPECT_EQ(cache, nullptr);
aclOpExecutor* executor = uniqueExecutor.get();
OpCacheKey opCacheKey = executor->GetOpExecCache()->GetOpCacheKey();
EXPECT_NE(executor->GetOpExecCache(), nullptr);
op::internal::GetLauncherCtx().ClearTilingCache();
ADD_TO_LAUNCHER_LIST_AICORE(Gelu, OP_INPUT(self.get()), OP_OUTPUT(out.get()), OP_OUTSHAPE(outShape.get(), 0));
EXPECT_NE(executor->GetOpExecCache(), nullptr);
EXPECT_EQ(opCacheKey.buf, nullptr);
EXPECT_EQ(opCacheKey.len, 0);
uniqueExecutor.ReleaseTo(&executor);
GetThreadLocalContext().usePTAHash_ = usePTAHash;
delete executor->GetOpExecCache();
delete executor;
}
TEST_F(OpCacheUt, GetFromCache)
{
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
op::Shape tShape{1, 2, 3};
auto self = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto in0 = std::make_tuple(self.get());
auto out0 = std::make_tuple(out.get());
std::string apiName = "aclnnGelu";
uint32_t OpTypeId = op::OpTypeDict::ToOpType("Gelu");
op::internal::GetThreadLocalContext().logInfo_.l2ApiName = "aclnnGelu";
op::internal::GetThreadLocalContext().logInfo_.l2SequenceCounter = OpGetLogSequence();
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
uint64_t* workspaceSize;
auto ret = op::internal::GetFromCache(&executor, workspaceSize, "aclnnGelu", in0, out0);
EXPECT_EQ(ret, false);
GetThreadLocalContext().usePTAHash_ = usePTAHash;
uniqueExecutor.ReleaseTo(&executor);
delete executor;
}
TEST_F(OpCacheUt, AddLaunchTensor)
{
float fpValue = 3.2;
uint64_t size = 1;
auto self = std::make_unique<aclTensor>(&fpValue, size, op::DataType::DT_FLOAT);
const aclStorage* storage = self.get()->GetStorage();
EXPECT_EQ(storage->IsFromWorkspace(), false);
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
opExecCache->AddLaunchTensor(self.get(), sizeof(void*));
EXPECT_EQ(opExecCache->AddLaunchData(65537), nullptr);
delete opExecCache;
}
TEST_F(OpCacheUt, AddTensorRelation)
{
op::Shape tShape{1, 2, 3};
auto self = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(tShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto opExecCache1 = new OpExecCache();
opExecCache1->SetCacheBuf(GetCacheBuf());
opExecCache1->AddTensorRelation(self.get(), out.get());
opExecCache1->Finalize();
delete opExecCache1;
}
TEST_F(OpCacheUt, apiTest)
{
auto opExecCache = new OpExecCache();
auto workspaceSize = opExecCache->GetWorkspaceSize();
EXPECT_EQ(workspaceSize, 0);
bool cacheable = op::internal::CheckCacheable();
EXPECT_EQ(cacheable, false);
delete opExecCache;
}
TEST_F(OpCacheUt, AddParamToBufaclScalarList)
{
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
double scalar_value = 5;
auto* scalar = aclCreateScalar(&scalar_value, aclDataType::ACL_DOUBLE);
auto scalarList = aclCreateScalarList(&scalar, 1);
AddParamToBuf(scalarList);
OpCacheKey key;
SetOpCacheKey(key);
EXPECT_NE(key.buf, nullptr);
EXPECT_NE(key.len, 0);
aclDestroyScalarList(scalarList);
GetThreadLocalContext().usePTAHash_ = usePTAHash;
}
TEST_F(OpCacheUt, AddParamToBufChar)
{
bool usePTAHash = GetThreadLocalContext().usePTAHash_;
GetThreadLocalContext().usePTAHash_ = false;
char* apiName = "aclnnGelu";
AddParamToBuf(apiName);
const char* nullptrCharPtr = nullptr;
AddParamToBuf(nullptrCharPtr);
char* nullptrCharPtrWithoutConst = nullptr;
AddParamToBuf(nullptrCharPtrWithoutConst);
OpCacheKey key;
SetOpCacheKey(key);
EXPECT_NE(key.buf, nullptr);
EXPECT_NE(key.len, 0);
GetThreadLocalContext().usePTAHash_ = usePTAHash;
}
TEST_F(OpCacheUt, OpExecCacheRemove)
{
const char_t* const cacheLimit = std::getenv("ACLNN_CACHE_LIMIT");
setenv("ACLNN_CACHE_LIMIT", "100000", 1);
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello1";
GetThreadLocalContext().cacheHashKeyLen_ = 6;
auto opExecCache = new OpExecCache();
bool succ = AddOpExecCache(opExecCache);
opExecCache->OldCacheClear();
EXPECT_EQ(succ, true);
RemoveExecCache(opExecCache);
delete opExecCache;
GetThreadLocalContext().hashKey_ = 10;
GetThreadLocalContext().cacheHashKey_ = nullptr;
GetThreadLocalContext().cacheHashKeyLen_ = 0;
auto opExecCache1 = new OpExecCache();
succ = AddOpExecCache(opExecCache1);
opExecCache1->OldCacheClear();
EXPECT_EQ(succ, true);
RemoveExecCache(opExecCache1);
delete opExecCache1;
RemoveExecCache(nullptr);
setenv("ACLNN_CACHE_LIMIT", cacheLimit, 1);
}
TEST_F(OpCacheUt, OpExecCacheDeleteTest)
{
auto old = GetThreadLocalContext().cacheHasFull_;
GetThreadLocalContext().cacheHasFull_ = false;
auto uniqueExecutor = CREATE_EXECUTOR();
GetThreadLocalContext().cacheHasFull_ = old;
}
TEST_F(OpCacheUt, CacheLaunch1982Test2)
{
setenv("ENABLE_1982", "1", 1);
PlatformInfoStub::GetInstance()->SetSoCVersion("Ascend910_93", "Ascend910_9391");
op::Shape outShape{100};
int64_t inputData[8];
aclIntArray self(inputData, 8);
aclTensor out(outShape, op::DataType::DT_INT32, ge::FORMAT_ND, nullptr);
aclTensor inputTensor(&self, op::DataType::DT_INT32);
auto input_arg = OP_INPUT(&inputTensor);
auto output_arg = OP_OUTPUT(&out);
auto ctx = op::MakeOpArgContext(input_arg, output_arg);
op::internal::ExpandableRtsArgBuffer buffer;
buffer.Init(TEST_LAUNCH_ARG_INIT_CAP, TEST_TILING_HOST_DATA_INIT_CAP);
op::internal::TilingData* tilingData = buffer.GetTilingDataPtr();
constexpr size_t TEST_TILING_DATA_LEN = 100;
tilingData->data_ = buffer.GetTilingDataAddr();
memset_s(tilingData->data_, TEST_TILING_HOST_DATA_INIT_CAP, 0, TEST_TILING_DATA_LEN);
tilingData->data_size_ = TEST_TILING_DATA_LEN;
tilingData->capacity_ = TEST_TILING_HOST_DATA_INIT_CAP;
op::internal::LaunchArgInfo argInfo(false, false, ctx);
op::internal::RtsArg arg(true, argInfo, &buffer);
arg.FillArgs();
GetThreadLocalContext().hashKey_ = 0;
GetThreadLocalContext().cacheHashKey_ = (uint8_t*)"hello";
GetThreadLocalContext().cacheHashKeyLen_ = 5;
auto opExecCache = new OpExecCache();
opExecCache->SetCacheBuf(GetCacheBuf());
GetOpCacheContext().SetOpCache(opExecCache);
auto cacheCtx = arg.DumpToCache();
EXPECT_NE(cacheCtx, nullptr);
op::internal::KernelLaunchConfig launchCfg;
launchCfg.funcHandle = (void*)0x12345678;
launchCfg.numBlocks = 32;
launchCfg.schemMode = 1;
launchCfg.dynUBufSize = 0;
launchCfg.blockDimOffset = 0;
launchCfg.engineType = op::internal::LaunchKernelEngineType::NO_VECTOR_CORE;
launchCfg.isFatBin = false;
std::string kernelName = "abs";
cacheCtx->SetRunParam(launchCfg, kernelName);
aclrtStream stream = 0;
auto res = op::internal::LaunchArgCache::RunFromCache(stream, GetCacheBuf());
EXPECT_EQ(res, ACLNN_SUCCESS);
op::DestroyOpArgContext(ctx);
InitPTACacheThreadLocal();
GetOpCacheContext().SetOpCache(nullptr);
delete opExecCache;
PlatformInfoStub::GetInstance()->Reset();
unsetenv("ENABLE_1982");
}
TEST_F(OpCacheUt, OpCacheManagerCount)
{
void* manager = op::internal::GetOpExecCacheManager();
EXPECT_NE(manager, nullptr);
op::internal::ReleaseOpExecCacheManager(manager);
op::internal::ReleaseOpExecCacheManager(nullptr);
}
TEST_F(OpCacheUt, MurmurHashRemain)
{
const int len = 15;
uint8_t buf[len];
memset_s(buf, len, 0, len);
op::internal::OpCacheKey key(buf, len);
op::internal::OpCacheKeyHash hasher;
std::size_t hash = hasher(key);
std::size_t exepectedHash = {9371257502052110381};
EXPECT_EQ(exepectedHash, hash);
}
namespace op {
namespace internal {
extern std::atomic<bool> g_enableOpCacheCount;
}
}
static void OpCacheUseCountTestFunc()
{
auto uniqueExecutor = CREATE_EXECUTOR();
EXPECT_EQ(g_enableOpCacheCount.load(), true);
DisableOpCacheCount();
EXPECT_EQ(g_enableOpCacheCount.load(), false);
}
TEST_F(OpCacheUt, DisableOpCacheUseCountTest)
{
std::thread t(OpCacheUseCountTestFunc);
t.join();
}