* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This file is part of the MindStudio project.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*/
#include <vector>
#include "acl/acl.h"
#include "acl/acl_prof.h"
#include "aclnnop/aclnn_add.h"
#include "common/helper_mspti.h"
#include "common/util_acl.h"
#include "mspti.h"
int64_t GetShapeSize(const std::vector<int64_t>& shape)
{
int64_t shapeSize = 1;
for (auto i : shape) {
shapeSize *= i;
}
return shapeSize;
}
template <typename T>
int CreateAclTensor(const std::vector<T>& hostData, const std::vector<int64_t>& shape, void** deviceAddr,
aclDataType dataType, aclTensor** tensor)
{
auto size = GetShapeSize(shape) * sizeof(T);
ACL_CALL(aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST));
ACL_CALL(aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE));
std::vector<int64_t> strides(shape.size(), 1);
for (int64_t i = shape.size() - 2; i >= 0; i--) {
strides[i] = shape[i + 1] * strides[i + 1];
}
*tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND,
shape.data(), shape.size(), *deviceAddr);
return 0;
}
int DoAclAdd(aclrtContext context, aclrtStream stream)
{
auto ret = ACL_SUCCESS;
std::vector<int64_t> selfShape = {4, 2};
std::vector<int64_t> otherShape = {4, 2};
std::vector<int64_t> outShape = {4, 2};
void* selfDeviceAddr = nullptr;
void* otherDeviceAddr = nullptr;
void* outDeviceAddr = nullptr;
aclTensor* self = nullptr;
aclTensor* other = nullptr;
aclScalar* alpha = nullptr;
aclTensor* out = nullptr;
std::vector<float> selfHostData = {0, 1, 2, 3, 4, 5, 6, 7};
std::vector<float> otherHostData = {1, 1, 1, 2, 2, 2, 3, 3};
std::vector<float> outHostData = {0, 0, 0, 0, 0, 0, 0, 0};
float alphaValue = 1.2f;
ACL_CALL(CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self));
ACL_CALL(CreateAclTensor(otherHostData, otherShape, &otherDeviceAddr, aclDataType::ACL_FLOAT, &other));
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
CHECK_RET(alpha != nullptr, return ret);
ACL_CALL(CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out));
uint64_t workspaceSize = 0;
aclOpExecutor* executor;
ACL_CALL(aclnnAddGetWorkspaceSize(self, other, alpha, out, &workspaceSize, &executor));
void* workspaceAddr = nullptr;
if (workspaceSize > 0) {
ACL_CALL(aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
}
ACL_CALL(aclnnAdd(workspaceAddr, workspaceSize, executor, stream));
ACL_CALL(aclrtSynchronizeStream(stream));
auto size = GetShapeSize(outShape);
std::vector<float> resultData(size, 0);
ACL_CALL(aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr,
size * sizeof(float), ACL_MEMCPY_DEVICE_TO_HOST));
for (int64_t i = 0; i < size; i++) {
LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]);
}
aclDestroyTensor(self);
aclDestroyTensor(other);
aclDestroyScalar(alpha);
aclDestroyTensor(out);
aclrtFree(selfDeviceAddr);
aclrtFree(otherDeviceAddr);
aclrtFree(outDeviceAddr);
if (workspaceSize > 0) {
aclrtFree(workspaceAddr);
}
return 0;
}
void SetUpMspti()
{
InitMspti(nullptr, nullptr);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_KERNEL);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_API);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_MEMCPY);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_MEMORY);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_MEMSET);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_API);
msptiActivityEnable(MSPTI_ACTIVITY_KIND_RUNTIME_API);
}
int main()
{
int32_t deviceId = 0;
aclrtContext context;
aclrtStream stream;
Init(deviceId, &context, &stream);
SetUpMspti();
DoAclAdd(context, stream);
DeInit(deviceId, &context, &stream);
DeInitMspti();
return 0;
}