* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*/
#ifndef RUNETIME_HOOKS_H
#define RUNETIME_HOOKS_H
#include <cstdint>
#include <cstddef>
#include <vector>
#include <map>
#include <string>
#include <fstream>
#include <cstring>
#include <dlfcn.h>
#include "vallina_symbol.h"
#include "acl_hooks.h"
namespace MemScope {
constexpr uint64_t MAX_BINARY_SIZE = 32ULL * 1024 * 1024 * 1024;
struct RuntimeLibLoader {
static void *Load(void)
{
return LibLoad("libruntime.so");
}
};
struct ACLImplLibLoader {
static void *Load(void)
{
void* handle = LibLoad("libascendcl_impl.so");
if (handle != nullptr) {
return handle;
}
return LibLoad("libacl_rt_impl.so");
}
};
const void* GetHandleByStubFunc(const void *stubFunc);
}
#ifdef __cplusplus
extern "C" {
#endif
#ifndef RTS_API
#define RTS_API
#endif
using aclrtBinary = void*;
using aclrtBinHandle = void*;
using aclrtFuncHandle = void*;
using aclrtArgsHandle = void*;
using aclrtParamHandle = void*;
typedef enum {
ACL_RT_ENGINE_TYPE_AIC = 0,
ACL_RT_ENGINE_TYPE_AIV,
} aclrtEngineType;
typedef enum aclrtLaunchKernelAttrId {
ACL_RT_LAUNCH_KERNEL_ATTR_SCHEM_MODE = 1,
ACL_RT_LAUNCH_KERNEL_ATTR_ENGINE_TYPE = 3,
ACL_RT_LAUNCH_KERNEL_ATTR_BLOCKDIM_OFFSET,
ACL_RT_LAUNCH_KERNEL_ATTR_BLOCK_TASK_PREFETCH,
ACL_RT_LAUNCH_KERNEL_ATTR_DATA_DUMP,
ACL_RT_LAUNCH_KERNEL_ATTR_TIMEOUT,
} aclrtLaunchKernelAttrId;
typedef union aclrtLaunchKernelAttrValue {
uint8_t schemMode;
uint32_t localMemorySize;
aclrtEngineType engineType;
uint32_t blockDimOffset;
uint8_t isBlockTaskPrefetch;
uint8_t isDataDump;
uint16_t timeout;
uint32_t rsv[4];
} aclrtLaunchKernelAttrValue;
typedef struct aclrtLaunchKernelAttr {
aclrtLaunchKernelAttrId id;
aclrtLaunchKernelAttrValue value;
} aclrtLaunchKernelAttr;
typedef struct aclrtLaunchKernelCfg {
aclrtLaunchKernelAttr *attrs;
size_t numAttrs;
} aclrtLaunchKernelCfg;
typedef struct aclrtPlaceHolderInfo {
uint32_t addrOffset;
uint32_t dataOffset;
} aclrtPlaceHolderInfo;
typedef enum tagRtError {
RT_ERROR_NONE = 0x0,
RT_ERROR_INVALID_VALUE = 0x1,
RT_ERROR_MEMORY_ALLOCATION = 0x2,
RT_ERROR_INVALID_RESOURCE_HANDLE = 0x3,
RT_ERROR_INVALID_DEVICE_POINTER = 0x4,
RT_ERROR_INVALID_MEMCPY_DIRECTION = 0x5,
RT_ERROR_INVALID_DEVICE = 0x6,
RT_ERROR_NO_DEVICE = 0x7,
RT_ERROR_CMD_OCCUPY_FAILURE = 0x8,
RT_ERROR_SET_SIGNAL_FAILURE = 0x9,
RT_ERROR_UNSET_SIGNAL_FAILURE = 0xA,
RT_ERROR_OPEN_FILE_FAILURE = 0xB,
RT_ERROR_WRITE_FILE_FAILURE = 0xC,
RT_ERROR_MEMORY_ADDRESS_UNALIGNED = 0xD,
RT_ERROR_DRV_ERR = 0xE,
RT_ERROR_LOST_HEARTBEAT = 0xF,
RT_ERROR_REPORT_TIMEOUT = 0x10,
RT_ERROR_NOT_READY = 0x11,
RT_ERROR_DATA_OPERATION_FAIL = 0x12,
RT_ERROR_INVALID_L2_INSTR_SIZE = 0x13,
RT_ERROR_DEVICE_PROC_HANG_OUT = 0x14,
RT_ERROR_DEVICE_POWER_UP_FAIL = 0x15,
RT_ERROR_DEVICE_POWER_DOWN_FAIL = 0x16,
RT_ERROR_FEATURE_NOT_SUPPROT = 0x17,
RT_ERROR_KERNEL_DUPLICATE = 0x18,
RT_ERROR_MODEL_STREAM_EXE_FAILED = 0x91,
RT_ERROR_MODEL_LOAD_FAILED = 0x94,
RT_ERROR_END_OF_SEQUENCE = 0x95,
RT_ERROR_NO_STREAM_CB_REG = 0x96,
RT_ERROR_DATA_DUMP_LOAD_FAILED = 0x97,
RT_ERROR_CALLBACK_THREAD_UNSUBSTRIBE = 0x98,
RT_ERROR_RESERVED
} rtError_t;
typedef void *rtStream_t;
typedef uint32_t rtMemType_t;
typedef struct tagRtSmData {
uint64_t L2_mirror_addr;
uint32_t L2_data_section_size;
uint8_t L2_preload;
uint8_t modified;
uint8_t priority;
int8_t prev_L2_page_offset_base;
uint8_t L2_page_offset_base;
uint8_t L2_load_to_ddr;
uint8_t reserved[2];
} rtSmData_t;
typedef struct tagRtSmCtrl {
rtSmData_t data[8];
uint64_t size;
uint8_t remap[64];
array index: virtual l2 page id, array value: physic l2 page id */
uint8_t l2_in_main;
uint8_t reserved[3];
} rtSmDesc_t;
typedef struct rtHostInputInfo {
uint16_t addrOffset;
uint16_t dataOffset;
} rtHostInputInfo_t;
typedef struct tagRtArgsEx {
void *args;
rtHostInputInfo_t *hostInputInfoPtr;
uint32_t argsSize;
uint16_t tilingAddrOffset;
uint16_t tilingDataOffset;
uint16_t hostInputInfoNum;
uint8_t hasTiling;
uint8_t isNoNeedH2DCopy;
uint8_t reserved[4];
} rtArgsEx_t;
using rtFuncHandle = void *;
using rtLaunchArgsHandle = void *;
using RtStreamT = void *;
using RtSmDescT = void *;
typedef struct tagRtTaskCfgInfo {
uint8_t qos;
uint8_t partId;
uint8_t schemMode;
uint8_t res[1];
} rtTaskCfgInfo_t;
typedef struct tagRtTaskCfgInfoByHandle {
uint8_t qos;
uint8_t partId;
uint8_t schemMode;
bool d2dCrossFlag;
uint32_t blockDimOffset;
uint8_t dumpflag;
} RtTaskCfgInfoT;
typedef struct tagRtAicpuArgsEx {
void *args;
rtHostInputInfo_t *hostInputInfoPtr;
rtHostInputInfo_t *kernelOffsetInfoPtr;
uint32_t argsSize;
uint16_t hostInputInfoNum;
uint16_t kernelOffsetInfoNum;
uint32_t soNameAddrOffset;
uint32_t kernelNameAddrOffset;
bool isNoNeedH2DCopy;
uint8_t reserved[3];
} RtAicpuArgsExT;
RTS_API rtError_t rtKernelLaunch(
const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);
RTS_API rtError_t rtKernelLaunchWithHandleV2(void *hdl, const uint64_t tilingKey, uint32_t blockDim,
rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const rtTaskCfgInfo_t *cfgInfo);
RTS_API rtError_t rtKernelLaunchWithFlagV2(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags, const rtTaskCfgInfo_t *cfgInfo);
RTS_API rtError_t rtAicpuKernelLaunchExWithArgs(const uint32_t kernelType, const char* const opName,
const uint32_t blockDim, const RtAicpuArgsExT *argsInfo, RtSmDescT * const smDesc, const RtStreamT stm,
const uint32_t flags);
RTS_API rtError_t rtLaunchKernelByFuncHandle(rtFuncHandle funcHandle, uint32_t blockDim,
rtLaunchArgsHandle argsHandle, RtStreamT stm);
RTS_API rtError_t rtLaunchKernelByFuncHandleV2(rtFuncHandle funcHandle, uint32_t blockDim,
rtLaunchArgsHandle argsHandle, RtStreamT stm, const RtTaskCfgInfoT *cfgInfo);
RTS_API aclError aclrtLaunchKernelImpl(aclrtFuncHandle funcHandle, uint32_t blockDim, const void *argsData, size_t argsSize, aclrtStream stream);
RTS_API aclError aclrtLaunchKernelWithConfigImpl(aclrtFuncHandle funcHandle, uint32_t blockDim, aclrtStream stream, aclrtLaunchKernelCfg *cfg, aclrtArgsHandle argsHandle, void *reserve);
RTS_API aclError aclrtLaunchKernelV2Impl(aclrtFuncHandle funcHandle, uint32_t blockDim, const void *argsData, size_t argsSize, aclrtLaunchKernelCfg *cfg, aclrtStream stream);
RTS_API aclError aclrtLaunchKernelWithHostArgsImpl(aclrtFuncHandle funcHandle, uint32_t blockDim, aclrtStream stream, aclrtLaunchKernelCfg *cfg,
void *hostArgs, size_t argsSize, aclrtPlaceHolderInfo *placeHolderArray, size_t placeHolderNum);
#ifdef __cplusplus
}
#endif
#endif