* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "gtest/gtest.h"
#include <vector>
#include "opdev/op_executor.h"
#include "opdev/op_dfx.h"
#include "op_dfx_internal.h"
#include "opdev/data_type_utils.h"
#include "opdev/format_utils.h"
#include "thread_local_context.h"
#include "launcher_ctx.h"
#include "kernel_launcher.h"
#include "opdev/make_op_executor.h"
using namespace op;
using namespace std;
using namespace op::internal;
class OpExecutorTest : public testing::Test {};
TEST_F(OpExecutorTest, TestOpExecutor)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
bool boolValue[] = {true, false, true};
auto array1 = executor.AllocBoolArray(boolValue, 3);
EXPECT_NE(array1, nullptr);
float floatValue[] = {1, 2, 3};
auto array2 = executor.AllocFloatArray(floatValue, 3);
EXPECT_NE(array2, nullptr);
int64_t intValue[] = {1, 2, 3};
auto array3 = executor.AllocIntArray(intValue, 3);
EXPECT_NE(array3, nullptr);
float fValue = 3.2;
auto scalar1 = executor.AllocScalar(&fValue, op::DataType::DT_FLOAT);
EXPECT_NE(scalar1, nullptr);
auto scalar2 = executor.AllocScalar(float(1.2));
EXPECT_NE(scalar2, nullptr);
auto tensor1 = executor.AllocTensor({1, 2}, {1, 2}, DataType::DT_FLOAT, Format::FORMAT_ND, Format::FORMAT_ND);
EXPECT_NE(tensor1, nullptr);
auto tensor2 = executor.AllocTensor({1, 2}, DataType::DT_FLOAT);
EXPECT_NE(tensor2, nullptr);
auto tensor3 = executor.AllocTensor(DataType::DT_FLOAT, Format::FORMAT_ND, Format::FORMAT_ND);
EXPECT_NE(tensor3, nullptr);
auto tensor4 = executor.ConvertToTensor(scalar1, op::DataType::DT_FLOAT);
EXPECT_NE(tensor4, nullptr);
auto tensor5 = executor.ConvertToTensor(array1, DataType::DT_BOOL);
EXPECT_NE(tensor5, nullptr);
auto tensor6 = executor.ConvertToTensor(array2, DataType::DT_FLOAT);
EXPECT_NE(tensor6, nullptr);
auto tensor7 = executor.ConvertToTensor(array3, DataType::DT_INT64);
EXPECT_NE(tensor7, nullptr);
bfloat16 bf16Array[4] = {0, 1, 2, 3};
auto tensor8 = executor.ConvertToTensor(bf16Array, 4, DataType::DT_BF16);
EXPECT_NE(tensor8, nullptr);
aclBf16Array bfArray(bf16Array, 4);
auto tensor9 = executor.ConvertToTensor(&bfArray, DataType::DT_BF16);
EXPECT_NE(tensor9, nullptr);
auto tensor10 = executor.AllocHostTensor({100}, DataType::DT_DOUBLE, Format::FORMAT_ND);
EXPECT_NE(tensor10, nullptr);
auto tensor31 = executor.ConvertToTensor(scalar1, op::DataType::DT_INT4);
EXPECT_EQ(tensor31, nullptr);
uint64_t size = 1;
const int64_t int64Value = 3;
aclTensor* tensor21 = executor.AllocHostTensor(&int64Value, size, op::DataType::DT_INT64);
EXPECT_NE(tensor21, nullptr);
int32_t intValue1 = 3;
aclTensor* tensor22 = executor.AllocHostTensor(&intValue1, size, op::DataType::DT_INT32);
EXPECT_NE(tensor22, nullptr);
double doubleValue = 3.2;
aclTensor* tensor23 = executor.AllocHostTensor(&doubleValue, size, op::DataType::DT_DOUBLE);
EXPECT_NE(tensor23, nullptr);
const char* Value = "qqq";
aclTensor* tensor24 = executor.AllocHostTensor(Value, size, op::DataType::DT_STRING);
EXPECT_EQ(tensor24, nullptr);
const uint64_t uint64Value = 3;
aclTensor* tensor25 = executor.AllocHostTensor(&uint64Value, size, op::DataType::DT_UINT64);
EXPECT_NE(tensor25, nullptr);
int64_t data[100] = {1};
tensor10->SetIntData(data, 100, DataType::DT_INT64);
tensor10->SetIntData(data, 100, DataType::DT_UINT64);
tensor10->SetIntData(data, 100, DataType::DT_INT32);
tensor10->SetIntData(data, 100, DataType::DT_UINT32);
tensor10->SetIntData(data, 100, DataType::DT_INT16);
tensor10->SetIntData(data, 100, DataType::DT_UINT16);
tensor10->SetIntData(data, 100, DataType::DT_INT8);
tensor10->SetIntData(data, 100, DataType::DT_UINT8);
tensor10->SetIntData(data, 100, DataType::DT_DOUBLE);
tensor10->SetBoolData((bool*)data, 100, DataType::DT_BOOL);
tensor10->SetFloatData((float*)data, 100, DataType::DT_FLOAT);
tensor10->SetFp16Data((fp16_t*)data, 100, DataType::DT_FLOAT16);
tensor10->SetBf16Data((bfloat16*)data, 100, DataType::DT_BF16);
auto tensor11 = executor.AllocHostTensor({100}, {100}, DataType::DT_FLOAT, Format::FORMAT_ND, Format::FORMAT_ND);
EXPECT_NE(tensor11, nullptr);
vector<int64_t> vec12(100, 1);
auto tensor12 = executor.AllocHostTensor(vec12.data(), vec12.size(), DataType::DT_INT64);
EXPECT_NE(tensor12, nullptr);
bool vec13[100] = {true};
auto tensor13 = executor.AllocHostTensor(vec13, 100, DataType::DT_BOOL);
EXPECT_NE(tensor13, nullptr);
vector<float> vec14(100, 1.0);
auto tensor14 = executor.AllocHostTensor(vec14.data(), vec14.size(), DataType::DT_FLOAT);
EXPECT_NE(tensor14, nullptr);
vector<fp16_t> vec15(100, 1);
auto tensor15 = executor.AllocHostTensor(vec15.data(), vec15.size(), DataType::DT_FLOAT16);
EXPECT_NE(tensor15, nullptr);
vector<bfloat16> vec16(100, 1);
auto tensor16 = executor.AllocHostTensor(vec16.data(), vec16.size(), DataType::DT_BF16);
EXPECT_NE(tensor16, nullptr);
executor.UpdateTensorAddr(nullptr, 512);
tensor2->SetFromWorkspace(false);
tensor10->SetDataType(DataType::DT_UINT64);
EXPECT_EQ(tensor10->GetDataType(), DataType::DT_UINT64);
}
TEST_F(OpExecutorTest, TestOpExecutorWorkspace)
{
aclOpExecutor executor;
void* addr = executor.GetWorkspaceAddr();
EXPECT_EQ(addr, nullptr);
uint64_t size = executor.GetWorkspaceSize();
EXPECT_EQ(size, 0);
}
TEST_F(OpExecutorTest, NnopContiguousExecutorCheckOffsetSuccess)
{
aclOpExecutor executor;
uint64_t size = executor.GetLinearWorkspaceSize();
EXPECT_EQ(size, 0);
op::FVector<uint64_t> ws({1, 2, 3});
executor.SetWorkspaceOffsets(ws);
EXPECT_EQ(executor.GetWorkspaceOffsets().size(), 3);
EXPECT_EQ(executor.GetWorkspaceOffsets()[0], 1);
EXPECT_EQ(executor.GetWorkspaceOffsets()[1], 2);
EXPECT_EQ(executor.GetWorkspaceOffsets()[2], 3);
}
TEST_F(OpExecutorTest, CommonOpExecutorRunNullptr)
{
EXPECT_EQ(CommonOpExecutorRun(nullptr, 0, nullptr, nullptr), ACLNN_ERR_PARAM_NULLPTR);
}
TEST_F(OpExecutorTest, AddToKernelLauncherListDvpp)
{
op::Shape selfShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
op::Shape idxShape{33, 15, 14, 48};
auto self = std::make_unique<aclTensor>(selfShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(outShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto idx = std::make_unique<aclTensor>(idxShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
const aclTensor* wsArr[] = {out.get(), idx.get()};
uint32_t opType = op::OpTypeDict::ToOpType("Sort");
int64_t dim = 0;
bool descending = true;
std::string apiName = "aclnnArgsort";
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get(), idx.get());
auto attr = OP_ATTR(dim, descending);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
thread_local uint64_t kernelLaunchIdDefinedInL0Dfx = op::internal::GenKernelLauncherId("Sort");
op::internal::ProfilingInfoId profilingInfoId(0, kernelLaunchIdDefinedInL0Dfx, 0);
op::internal::GetLauncherCtx().ClearTilingCache();
aclTensorList* workspace = nullptr;
auto ctx = op::MakeOpArgContext(input, output, attr);
auto* launcher = new op::AiCoreKernelLauncher{opType, op::AI_CORE, profilingInfoId, executor, ctx};
launcher->SaveLaunchCtx(std::move(op::internal::GetLauncherCtx()));
auto ret = executor->AddToKernelLauncherListDvpp(opType, launcher, ctx);
EXPECT_EQ(ret, ACL_SUCCESS);
}
TEST_F(OpExecutorTest, AddToKernelLauncherListAiCpu)
{
op::Shape selfShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
op::Shape idxShape{33, 15, 14, 48};
auto self = std::make_unique<aclTensor>(selfShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(outShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto idx = std::make_unique<aclTensor>(idxShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
const aclTensor* wsArr[] = {out.get(), idx.get()};
uint32_t opType = op::OpTypeDict::ToOpType("Sort");
int64_t dim = 0;
bool descending = true;
std::string apiName = "aclnnArgsort";
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get(), idx.get());
auto attr = OP_ATTR(dim, descending);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
thread_local uint64_t kernelLaunchIdDefinedInL0Dfx = op::internal::GenKernelLauncherId("Sort");
op::internal::ProfilingInfoId profilingInfoId(0, kernelLaunchIdDefinedInL0Dfx, 0);
op::internal::GetLauncherCtx().ClearTilingCache();
aclTensorList* workspace = nullptr;
auto ctx = op::MakeOpArgContext(input, output, attr);
auto* launcher = new op::AiCoreKernelLauncher{opType, op::AI_CORE, profilingInfoId, executor, ctx};
launcher->SaveLaunchCtx(std::move(op::internal::GetLauncherCtx()));
auto ret = executor->AddToKernelLauncherListAiCpu(opType, launcher, ctx);
EXPECT_EQ(ret, ACL_SUCCESS);
}
TEST_F(OpExecutorTest, AddToKernelLauncherListCopyTask)
{
op::Shape selfShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
op::Shape idxShape{33, 15, 14, 48};
auto self = std::make_unique<aclTensor>(selfShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(outShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto idx = std::make_unique<aclTensor>(idxShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
const aclTensor* wsArr[] = {out.get(), idx.get()};
uint32_t opType = op::OpTypeDict::ToOpType("Sort");
int64_t dim = 0;
bool descending = true;
std::string apiName = "aclnnArgsort";
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get(), idx.get());
auto attr = OP_ATTR(dim, descending);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
thread_local uint64_t kernelLaunchIdDefinedInL0Dfx = op::internal::GenKernelLauncherId("Sort");
op::internal::ProfilingInfoId profilingInfoId(0, kernelLaunchIdDefinedInL0Dfx, 0);
op::internal::GetLauncherCtx().ClearTilingCache();
aclTensorList* workspace = nullptr;
auto ctx = op::MakeOpArgContext(input, output, attr);
auto* launcher = new op::AiCoreKernelLauncher{opType, op::AI_CORE, profilingInfoId, executor, ctx};
launcher->SaveLaunchCtx(std::move(op::internal::GetLauncherCtx()));
auto ret1 = executor->AddToKernelLauncherListCopyTask(opType, launcher, *ctx->GetOpArg(op::OpArgDef::OP_OPTION_ARG),
*ctx->GetOpArg(op::OpArgDef::OP_OUTPUT_ARG),
*ctx->GetOpArg(op::OpArgDef::OP_ATTR_ARG));
EXPECT_EQ(ret1, ACL_SUCCESS);
}
TEST_F(OpExecutorTest, apiTest)
{
op::Shape selfShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
op::Shape idxShape{33, 15, 14, 48};
auto self = std::make_unique<aclTensor>(selfShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto out = std::make_unique<aclTensor>(outShape, op::DataType::DT_FLOAT16, op::Format::FORMAT_ND, nullptr);
auto idx = std::make_unique<aclTensor>(idxShape, op::DataType::DT_INT32, op::Format::FORMAT_ND, nullptr);
const aclTensor* wsArr[] = {out.get(), idx.get()};
uint32_t opType = op::OpTypeDict::ToOpType("Sort");
int64_t dim = 0;
bool descending = true;
std::string apiName = "aclnnArgsort";
auto input = OP_INPUT(self.get());
auto output = OP_OUTPUT(out.get(), idx.get());
auto attr = OP_ATTR(dim, descending);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
executor->AddTensorRelation(self.get(), out.get());
op::fp16_t fp16Value = 3.2;
aclScalar* scalar1 = executor->AllocScalar(fp16Value);
EXPECT_NE(scalar1, nullptr);
op::bfloat16 bfp16Value = 3.2;
aclScalar* scalar2 = executor->AllocScalar(bfp16Value);
EXPECT_NE(scalar2, nullptr);
double doubleValue = 3.2;
aclScalar* scalar3 = executor->AllocScalar(doubleValue);
EXPECT_NE(scalar3, nullptr);
int32_t intValue = 3;
aclScalar* intScalar = executor->AllocScalar(intValue);
int64_t int64Value = 3;
aclScalar* int64Scalar = executor->AllocScalar(int64Value);
int16_t int16Value = 3;
aclScalar* int16Scalar = executor->AllocScalar(int16Value);
int8_t int8Value = 3;
aclScalar* int8Scalar = executor->AllocScalar(int8Value);
uint32_t uint32Value = 3;
aclScalar* uint32Scalar = executor->AllocScalar(uint32Value);
uint64_t uint64Value = 3;
aclScalar* uint64Scalar = executor->AllocScalar(uint64Value);
uint16_t uint16Value = 3;
aclScalar* uint16Scalar = executor->AllocScalar(uint16Value);
uint8_t uint8Value = 3;
aclScalar* uint8Scalar = executor->AllocScalar(uint8Value);
bool boolValue = true;
aclScalar* boolScalar = executor->AllocScalar(boolValue);
}
TEST_F(OpExecutorTest, apiOtherTest)
{
char* apiName = "aclnnArgsort";
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
InitL2Phase1Context(apiName, &executor);
InitL2Phase2Context(apiName, executor);
op::Shape selfShape{33, 15, 1, 48};
op::Shape otherShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
auto self = new aclTensor(selfShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto other = new aclTensor(otherShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
float alpha = 13.37;
auto out = new aclTensor(outShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto input = OP_INPUT(self, other);
auto output = OP_OUTPUT(out);
auto attr = OP_ATTR(alpha);
auto ws = OP_WORKSPACE(out);
auto ctx = op::MakeOpArgContext(input, output, attr, ws);
uint32_t opType = op::OpTypeDict::ToOpType("Axpy");
auto ret = CreatAiCoreKernelLauncher("Axpy", opType, executor, ctx);
EXPECT_EQ(ret, ACL_SUCCESS);
op::OpArgContext* ctx2 = nullptr;
CreatDSAKernelLauncher("Axpy", opType, DSARandomNormalTaskType, executor, ctx2);
auto ctx1 = op::MakeOpArgContext(input, output, attr);
ret = CreatAiCoreKernelLauncher("Axpy", opType, executor, ctx1);
EXPECT_EQ(ret, ACL_SUCCESS);
ret = InferShape(opType, *ctx->GetOpArg(op::OpArgDef::OP_OPTION_ARG), *ctx->GetOpArg(op::OpArgDef::OP_OUTPUT_ARG),
*ctx->GetOpArg(op::OpArgDef::OP_ATTR_ARG));
EXPECT_EQ(ret, ACLNN_SUCCESS);
}
TEST_F(OpExecutorTest, DSAKernelLauncherUnNormalTaskType)
{
char* apiName = "aclnnArgsort";
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = uniqueExecutor.get();
InitL2Phase1Context(apiName, &executor);
InitL2Phase2Context(apiName, executor);
op::Shape selfShape{33, 15, 1, 48};
op::Shape otherShape{33, 15, 14, 48};
op::Shape outShape{33, 15, 14, 48};
auto self = new aclTensor(selfShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto other = new aclTensor(otherShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
float alpha = 13.37;
auto out = new aclTensor(outShape, op::DataType::DT_FLOAT, op::Format::FORMAT_ND, nullptr);
auto input = OP_INPUT(self, other);
auto output = OP_OUTPUT(out);
auto attr = OP_ATTR(alpha);
auto ws = OP_WORKSPACE(out);
auto ctx = op::MakeOpArgContext(input, output, attr, ws);
uint32_t opType = op::OpTypeDict::ToOpType("Axpy");
DSA_TASK_TYPE unNormalDsaTaskType = static_cast<DSA_TASK_TYPE>(100);
CreatDSAKernelLauncher("Axpy", opType, unNormalDsaTaskType, executor, ctx);
aclnnStatus ret = InferShape(opType, *ctx->GetOpArg(op::OpArgDef::OP_OPTION_ARG),
*ctx->GetOpArg(op::OpArgDef::OP_OUTPUT_ARG),
*ctx->GetOpArg(op::OpArgDef::OP_ATTR_ARG));
EXPECT_EQ(ret, ACLNN_SUCCESS);
}
TEST_F(OpExecutorTest, phase2ParamCheckTest)
{
char* apiName = "aclnnArgsort";
aclrtStream stream = nullptr;
InitL2Phase2Context(apiName, nullptr);
int* ptr = new int;
auto ret = CommonOpExecutorRun(ptr, 500, nullptr, stream);
EXPECT_EQ(ret, ACLNN_ERR_PARAM_NULLPTR);
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = nullptr;
uniqueExecutor.ReleaseTo(&executor);
ret = CommonOpExecutorRun(nullptr, 500, executor, stream);
EXPECT_EQ(ret, ACLNN_ERR_PARAM_NULLPTR);
auto opExecCache = new OpExecCache();
OpExecCacheWrap* cacheWrap = CreateCacheWrap(opExecCache);
executor = reinterpret_cast<aclOpExecutor*>(cacheWrap);
ret = CommonOpExecutorRun(nullptr, 500, executor, stream);
EXPECT_EQ(ret, ACLNN_ERR_PARAM_NULLPTR);
delete opExecCache;
delete ptr;
}
TEST_F(OpExecutorTest, phase2ParamCheckSuccessTest)
{
aclrtStream stream = nullptr;
auto uniqueExecutor = CREATE_EXECUTOR();
aclOpExecutor* executor = nullptr;
uniqueExecutor.ReleaseTo(&executor);
auto ret = CommonOpExecutorRun(nullptr, 0, executor, stream);
EXPECT_EQ(ret, ACLNN_SUCCESS);
}
TEST_F(OpExecutorTest, phase1ParamCheckTest)
{
uint64_t workspaceSize;
aclOpExecutor** executor = nullptr;
auto ret = CheckPhase1Params(executor, &workspaceSize);
EXPECT_EQ(ret, ACLNN_ERR_PARAM_NULLPTR);
int* ptr = new int;
executor = reinterpret_cast<aclOpExecutor**>(ptr);
ret = CheckPhase1Params(executor, nullptr);
EXPECT_EQ(ret, ACLNN_ERR_PARAM_NULLPTR);
ret = CheckPhase1Params(executor, &workspaceSize);
EXPECT_EQ(ret, ACLNN_SUCCESS);
delete ptr;
}
static void L2Phase2Func(const char* apiName, aclOpExecutor* executor, const int64_t configVal)
{
std::cout << "phase2 thread id: " << std::this_thread::get_id() << std::endl;
InitL2Phase2Context(apiName, executor);
EXPECT_EQ(executor->GetOpConfigInfo().isDeterministicOn_, (configVal == 1));
EXPECT_EQ(op::internal::GetThreadLocalContext().opConfigInfo_.isDeterministicOn_, (configVal == 1));
}
TEST_F(OpExecutorTest, DeterministicTest)
{
std::cout << "phase1 thread id: " << std::this_thread::get_id() << std::endl;
const char* apiName = "aclnnArgsort";
int64_t deterministicValue = 0;
aclrtGetSysParamOpt(ACL_OPT_DETERMINISTIC, &deterministicValue);
std::cout << "deterministic value: " << deterministicValue << std::endl;
aclOpExecutor* executor = nullptr;
InitL2Phase1Context(apiName, &executor);
EXPECT_EQ(op::internal::GetThreadLocalContext().opConfigInfo_.isDeterministicOn_, (deterministicValue == 1));
auto uniqueExecutor = CREATE_EXECUTOR();
uniqueExecutor.ReleaseTo(&executor);
EXPECT_EQ(executor->GetOpConfigInfo().isDeterministicOn_, (deterministicValue == 1));
std::thread t(L2Phase2Func, apiName, executor, deterministicValue);
t.join();
delete executor;
executor = nullptr;
deterministicValue = (deterministicValue == 0) ? 1 : 0;
aclrtSetSysParamOpt(ACL_OPT_DETERMINISTIC, deterministicValue);
InitL2Phase1Context(apiName, &executor);
EXPECT_EQ(op::internal::GetThreadLocalContext().opConfigInfo_.isDeterministicOn_, (deterministicValue == 1));
auto uniqueExecutor2 = CREATE_EXECUTOR();
uniqueExecutor2.ReleaseTo(&executor);
EXPECT_EQ(executor->GetOpConfigInfo().isDeterministicOn_, (deterministicValue == 1));
std::thread t2(L2Phase2Func, apiName, executor, deterministicValue);
t2.join();
delete executor;
deterministicValue = (deterministicValue == 0) ? 1 : 0;
aclrtSetSysParamOpt(ACL_OPT_DETERMINISTIC, deterministicValue);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat8E4M3FN)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E4M3FN);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E4M3FN);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E4M3FN result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat8E5M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E5M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E5M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E5M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat8E8M0)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E8M0);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E8M0);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E8M0 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat6E3M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT6_E3M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT6_E3M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float6E3M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat6E2M3)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT6_E2M3);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT6_E2M3);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float6E2M3 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat4E2M1)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E2M1);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT4_E2M1);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E2M1 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarToFloat4E1M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E1M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT4_E1M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E1M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat8E4M3FN)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E4M3FN);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E4M3FN);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E4M3FN result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat8E5M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E5M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E5M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E5M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat8E8M0)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E8M0);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT8_E8M0);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E8M0 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat6E3M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT6_E3M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT6_E3M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float6E3M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat6E2M3)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT6_E2M3);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT6_E2M3);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float6E2M3 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat4E2M1)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E2M1);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT4_E2M1);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E2M1 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarToFloat4E1M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 1.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E1M2);
EXPECT_NE(tensor, nullptr);
EXPECT_EQ(tensor->GetDataType(), op::DataType::DT_FLOAT4_E1M2);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E1M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarNegativeToFloat8E4M3FN)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = -1.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E4M3FN);
EXPECT_NE(tensor, nullptr);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E4M3FN result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), -1.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarZeroToFloat8E5M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 0.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT8_E5M2);
EXPECT_NE(tensor, nullptr);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float8E5M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 0.0f);
}
TEST_F(OpExecutorTest, ConvertFloatScalarOverflowToFloat4E1M2)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
float fValue = 2.0f;
auto scalar = executor.AllocScalar(fValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E1M2);
EXPECT_NE(tensor, nullptr);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E1M2 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 1.75f);
}
TEST_F(OpExecutorTest, ConvertDoubleScalarOverflowToFloat4E2M1)
{
op::internal::GetThreadLocalContext().cacheHasFull_ = true;
aclOpExecutor executor;
double dValue = 10.0;
auto scalar = executor.AllocScalar(dValue);
ASSERT_NE(scalar, nullptr);
auto tensor = executor.ConvertToTensor(scalar, op::DataType::DT_FLOAT4_E2M1);
EXPECT_NE(tensor, nullptr);
auto* data = static_cast<uint8_t*>(tensor->GetStorageAddr());
op::Float4E2M1 result;
result.value = *data;
EXPECT_FLOAT_EQ(static_cast<float>(result), 6.0f);
}