* Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
* This file is a part of the CANN Open Software.
* Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "launch_kernel.h"
#include "common.h"
#include <fstream>
#include <iostream>
#include <mutex>
#include <string>
#include "acl/acl.h"
namespace ops_hccl_allgather {
static bool g_init = false;
static std::mutex g_mut;
static aclrtBinHandle g_binHandle = nullptr;
static aclrtFuncHandle g_funcHandle = nullptr;
const std::string AIV_BINARY_NAME = "hccl_custom_allgather_kernels.o";
const std::string KERNEL_NAME = "HcclAllGatherAivKernel";
static HcclResult LoadBinaryFromFile(const std::string& fileName, void*& buffer, size_t& length)
{
std::ifstream file(fileName, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
HCCL_ERROR("[LoadBinaryFromFile] Failed to open file: %s", fileName.c_str());
return HCCL_E_NOT_FOUND;
}
length = file.tellg();
file.seekg(0, std::ios::beg);
buffer = new(std::nothrow) char[length];
if (buffer == nullptr) {
HCCL_ERROR("[LoadBinaryFromFile] Failed to allocate memory for binary, size: %zu", length);
return HCCL_E_INTERNAL;
}
if (!file.read(static_cast<char*>(buffer), length)) {
HCCL_ERROR("[LoadBinaryFromFile] Failed to read file: %s", fileName.c_str());
delete[] static_cast<char*>(buffer);
buffer = nullptr;
return HCCL_E_INTERNAL;
}
return HCCL_SUCCESS;
}
HcclResult RegisterKernel()
{
std::lock_guard<std::mutex> guard(g_mut);
if (g_init) {
return HCCL_SUCCESS;
}
std::string binPath = AIV_BINARY_NAME;
HCCL_INFO("[RegisterKernel] Binary path: %s", binPath.c_str());
void* binBuffer = nullptr;
size_t binSize = 0;
CHK_RET(LoadBinaryFromFile(binPath, binBuffer, binSize));
HCCL_INFO("[RegisterKernel] Binary loaded. Size: %zu", binSize);
aclrtBinary binary = aclrtCreateBinary(binBuffer, binSize);
if (binary == nullptr) {
HCCL_ERROR("[RegisterKernel] aclrtCreateBinary failed");
delete[] static_cast<char*>(binBuffer);
return HCCL_E_INTERNAL;
}
aclError aclRet = aclrtBinaryLoad(binary, &g_binHandle);
if (aclRet != ACL_SUCCESS) {
HCCL_ERROR("[RegisterKernel] aclrtBinaryLoad failed, ret: %d", aclRet);
aclrtDestroyBinary(binary);
delete[] static_cast<char*>(binBuffer);
return HCCL_E_INTERNAL;
}
aclrtDestroyBinary(binary);
aclRet = aclrtBinaryGetFunction(g_binHandle, KERNEL_NAME.c_str(), &g_funcHandle);
if (aclRet != ACL_SUCCESS) {
HCCL_ERROR("[RegisterKernel] aclrtBinaryGetFunction failed for %s, ret: %d", KERNEL_NAME.c_str(), aclRet);
return HCCL_E_INTERNAL;
}
g_init = true;
HCCL_INFO("[RegisterKernel] Success. Function handle: %p", g_funcHandle);
return HCCL_SUCCESS;
}
HcclResult ExecuteKernelLaunch(const OpParam ¶m, aclrtStream stream)
{
if (!g_init) {
CHK_RET(RegisterKernel());
}
aclrtLaunchKernelCfg cfg;
aclrtLaunchKernelAttr attr[3];
attr[0].id = ACL_RT_LAUNCH_KERNEL_ATTR_SCHEM_MODE;
attr[0].value.schemMode = 1;
attr[1].id = ACL_RT_LAUNCH_KERNEL_ATTR_TIMEOUT_US;
attr[1].value.timeoutUs.timeoutLow = CUSTOM_TIMEOUT * 1000000;
attr[1].value.timeoutUs.timeoutHigh = 0;
attr[2].id = ACL_RT_LAUNCH_KERNEL_ATTR_ENGINE_TYPE;
attr[2].value.engineType = ACL_RT_ENGINE_TYPE_AIV;
cfg.numAttrs = 3;
cfg.attrs = attr;
HCCL_INFO("[ExecuteKernelLaunch] Invoking aclrtLaunchKernelWithHostArgs...");
aclError aclRet = aclrtLaunchKernelWithHostArgs(g_funcHandle, 1, stream, &cfg,
const_cast<OpParam*>(¶m), sizeof(OpParam),
nullptr, 0);
if (aclRet != ACL_SUCCESS) {
HCCL_ERROR("[ExecuteKernelLaunch] aclrtLaunchKernelWithHostArgs failed, ret: %d", aclRet);
return HCCL_E_INTERNAL;
}
HCCL_INFO("[ExecuteKernelLaunch] Launch command submitted successfully.");
return HCCL_SUCCESS;
}
HcclResult LaunchKernel(OpParam ¶m, aclrtStream stream) {
return ExecuteKernelLaunch(param, stream);
}
}