* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* ------------------------------------------------------------------------- */
#pragma once
#include "runtime.h"
#include <cstdint>
#include "core/PlatformConfig.h"
#include "utils/Struct.h"
* 之间进行通信的协议。协议的基本形式由协议头和协议载荷组成:
* <PacketHead> -- <PacketPayload>
*/
constexpr uint64_t MAX_BLOCKDIM_NUMS = 100;
constexpr uint64_t RECORD_BUF_SIZE_EACH_BLOCK = 100 * 1024 * 1024;
constexpr uint64_t RECORD_HEAD_SECURITY_VALUE = 0x5a5a5a5a5a5a5a5a;
constexpr int16_t CHECK_ALL_BLOCK = -1;
constexpr uint32_t DEFAULT_CACHE_SIZE = 100;
constexpr uint32_t GM_BUFFER_GUARD_DFT_SIZE = 32;
constexpr uint32_t GM_BUFFER_GUARD_MAX_SIZE = 1024;
constexpr unsigned char GM_BUFFER_GUARD_DFT_PATTERN = 0x5A;
constexpr uint16_t PLUGIN_PATH_MAX = 256;
constexpr uint16_t SOC_VERSION_MAX = 256;
constexpr uint16_t KERNEL_NAME_MAX = 2048;
constexpr uint16_t DUMP_PATH_MAX = 256;
constexpr uint64_t CACHE_LINE_SIZE = 64ULL;
constexpr uint16_t SIMT_THREAD_MAX_SIZE = 2048;
constexpr uint16_t SIMT_THREAD_MAX_PC_NUM = 64;
constexpr float SIMT_CACHE_SIZE_RATIO = 0.1;
constexpr float SHADOW_MEM_CACHE_SIZE_RATIO = 0.3;
constexpr float SIMT_ENTRY_CACHE_SIZE_RATIO = 0.3;
constexpr uint64_t SHADOW_MEM_MIN_BYTE_SIZE = 12 * 1024 * 1024;
constexpr uint64_t ILLEGAL_ADDR = 0xFFFFFFFFFFFFFFFFULL;
namespace OnlineShadowMemory {
constexpr uint64_t ONLINE_GLOBAL_MEM_MASK = 0xFFFFFFFFFFFFULL;
constexpr uint64_t ONLINE_LOCAL_MEM_MASK = 0xFFFFFFFFULL;
constexpr uint64_t ONLINE_ONE_SM_STAND_FOR_BYTE = 0xFFFFULL + 1;
enum MemoryByteStatus : uint8_t {
DEFAULT = 0,
READ,
GLOBAL_READ,
WRITE,
RACE,
};
enum class OnlineMemoryType : uint8_t {
GM = 0,
UB,
};
协议设计如下:
[63:32]: pc
[31:31]: sync threads state
[30:30]: memoryType 当前内存表示ub还是gm,默认为gm
[11:14]: memory status表示当前内存上的状态,分为DEFAULT/READ/GLOBAL_READ/WRITE/RACE ...
[10:0]: threadId
*/
constexpr uint64_t PC_START_BIT = 32;
constexpr uint64_t PC_MASK = 0xFFFFU;
constexpr uint64_t SYNC_STATE_START_BIT = 31;
constexpr uint64_t SYNC_STATE_MASK = 0x1U;
constexpr uint64_t MEMORY_TYPE_START_BIT = 30;
constexpr uint64_t MEMORY_TYPE_MASK = 0x1U;
constexpr uint64_t MEMORY_STATUS_START_BIT = 11;
constexpr uint64_t MEMORY_STATUS_MASK = 0xFU;
constexpr uint64_t THREAD_ID_MASK = 0x7FFU;
}
enum class OnlineSmAddrStatus : uint64_t {
LOCKED_BY_OTHER_THREADS = 1U,
UNALLOCATABLE = UINT64_MAX,
};
constexpr size_t MAX_ALL_PARAM_SIZE = 1ULL * 1024 * 1024 * 1024;
constexpr size_t MAX_SINGLE_PARAM_SIZE = 4096;
constexpr size_t MAX_MEMORY_RECORD_HEIGHT = 60ULL * 1024 * 1024 * 1024;
enum class PacketType : uint32_t {
DEVICE_SUMMARY = 0,
KERNEL_SUMMARY,
KERNEL_BINARY,
LOG_STRING,
MEMORY_RECORD = 1000,
KERNEL_RECORD,
IPC_RECORD,
MEM_REGION_PERMISSION,
SANITIZER_RECORD,
GM_ADDR_OUT_OF_BOUND_RECORD,
TEXT = 2000,
IPC_RESPONSE = 3000,
KERNEL_RECORD_RESPONSE,
INVALID = ~0U,
};
enum class MaskMode : uint8_t {
MASK_NORM = 0U,
MASK_COUNT,
};
struct PacketHead {
PacketType type;
};
struct DeviceSummary {
uint32_t device;
uint32_t blockSize;
uint32_t blockNum;
int32_t deviceId;
};
struct KernelSummary {
uint64_t pcStartAddr;
uint32_t blockDim;
KernelType kernelType;
bool isKernelWithDBI = false;
bool hasDebugLine = false;
char kernelName[KERNEL_NAME_MAX];
};
struct VaRegister {
uint64_t l64;
uint64_t h64;
};
struct ParaBaseRegister {
uint64_t addr = ILLEGAL_ADDR;
uint64_t size;
};
enum class RegisterValueType : uint64_t {
VAL_UINT64 = 0,
VAL_HALF,
VAL_FLOAT,
VAL_INT32
};
struct RegisterPayload {
RegisterValueType regValType;
uint64_t regVal;
int64_t regIdx;
};
struct Register {
uint64_t fmatrix;
uint64_t fmatrixB;
uint64_t l3dRpt;
uint64_t vectorMask0 = ~0ULL;
uint64_t vectorMask1 = ~0ULL;
uint64_t ndParaConfig;
uint64_t cmpMaskAddr = ~0UL;
MaskMode maskMode;
VaRegister va0;
VaRegister va1;
VaRegister va2;
VaRegister va3;
VaRegister va4;
VaRegister va5;
VaRegister va6;
VaRegister va7;
uint64_t sprLoopSizeUb2Out;
uint64_t sprLoopSizeOut2Ub;
uint64_t sprLoop1StrideUb2Out;
uint64_t sprLoop1StrideOut2Ub;
uint64_t sprLoop2StrideUb2Out;
uint64_t sprLoop2StrideOut2Ub;
uint64_t sprPadCntNdDma;
uint64_t sprLoop0StrideNdDma;
uint64_t sprLoop1StrideNdDma;
uint64_t sprLoop2StrideNdDma;
uint64_t sprLoop3StrideNdDma;
uint64_t sprLoop4StrideNdDma;
uint64_t sprLoopSizeOut2L1;
uint64_t sprLoop1StrideOut2L1;
uint64_t sprLoop2StrideOut2L1;
uint64_t sprMte2NzPara;
uint64_t sprMTE2SrcPara;
uint64_t sprLoop3Para;
uint64_t sprChannelPara;
uint64_t sprFmatrix;
uint64_t sprFmatrixB;
uint64_t sprFmatrixDual0;
uint64_t sprFmatrixDual1;
uint64_t sprL3dRpt;
uint64_t sprL3dRptB;
uint64_t sprPadding;
uint64_t sprPaddingB;
uint64_t ctrl = 0x08ULL;
uint64_t fftsBaseAddr;
uint64_t fpc;
uint64_t quantPre;
uint64_t quantPost;
RegisterPayload lreluAlpha;
uint64_t rsv[5];
};
enum class BlockType : uint8_t {
AIVEC,
AICUBE,
};
struct KernelInfo {
uint64_t totalBlockDim{};
uint64_t totalCacheSize{};
uint32_t kernelParamNum{};
KernelType kernelType{};
uint64_t l2CacheOffset;
};
struct BlockInfo {
uint64_t simtSyncThreadCount{};
uint64_t simtEndThreadCount{};
uint64_t simtEndLastThread{};
uint64_t simtEntryUseSize{};
uint32_t simtEndCount{};
uint32_t simtCallCount{};
uint16_t blockId{};
BlockType blockType{};
uint8_t vecSubBlockDim{};
};
struct CheckParmsInfo {
uint32_t cacheSize = DEFAULT_CACHE_SIZE;
int16_t checkBlockId = CHECK_ALL_BLOCK;
bool defaultcheck{};
bool memcheck{};
bool racecheck{};
bool initcheck{};
bool synccheck{};
bool registerCheck{};
uint32_t gmBufferGuardSize = GM_BUFFER_GUARD_DFT_SIZE;
};
struct HostMemoryInfo {
uint64_t addr;
uint64_t size;
uint32_t permission;
bool operator<(const HostMemoryInfo& other) const
{
if (addr != other.addr) {
return addr < other.addr;
}
return size < other.size;
}
};
struct SimtInfo {
uint32_t ubDynamicSize{};
};
struct OffsetInfo {
uint64_t offset{};
uint64_t size{};
};
struct ProtocolOffsetInfo {
OffsetInfo simtErrorInfo;
OffsetInfo shadowMemoryInfo;
OffsetInfo simtEntryInfo;
uint32_t blockHeadSize{};
};
struct SimtRecordBlockHeadImpl {
uint64_t recordCount{};
uint64_t recordWriteCount{};
uint64_t offset{};
uint64_t writeOffset{};
uint64_t syncThreadPC[SIMT_THREAD_MAX_PC_NUM];
uint32_t syncThreadNum[SIMT_THREAD_MAX_PC_NUM];
};
using SimtRecordBlockHead = StructAlignBy<SimtRecordBlockHeadImpl, 64UL>;
static_assert(sizeof(SimtRecordBlockHead) % 64UL == 0UL, "SimtRecordBlockHead size should aligned by 64 bytes");
constexpr int64_t C220_A2_A3_MAXCORE_NUM = 75;
struct RecordGlobalHeadImpl {
uint64_t securityVal = RECORD_HEAD_SECURITY_VALUE;
CheckParmsInfo checkParms{};
KernelInfo kernelInfo{};
ProtocolOffsetInfo offsetInfo{};
SimtInfo simtInfo{};
bool supportSimt{false};
Register registers[C220_A2_A3_MAXCORE_NUM];
};
using RecordGlobalHead = StructAlignBy<RecordGlobalHeadImpl, 64UL>;
static_assert(sizeof(Register) % 64UL == 0UL, "Register size should aligned by 64 bytes");
static_assert(sizeof(RecordGlobalHead) % 64UL == 0UL, "RecordGlobalHead size should aligned by 64 bytes");
struct RecordBlockHeadImpl {
uint64_t recordCount{};
uint64_t recordWriteCount{};
uint64_t offset{};
uint64_t writeOffset{};
BlockInfo blockInfo{};
uint32_t hostMemoryNum{};
#if defined(__CCE_IS_AICORE__) && __CCE_IS_AICORE__ == 1
__gm__ HostMemoryInfo *hostMemoryInfoPtr{nullptr};
#else
HostMemoryInfo *hostMemoryInfoPtr{nullptr};
#endif
uint32_t mstxFuseScopeDepth{};
bool extraWriteSuccess{false};
ParaBaseRegister paraBase;
};
using RecordBlockHead = StructAlignBy<RecordBlockHeadImpl, 64UL>;
static_assert(sizeof(RecordBlockHead) % 64UL == 0UL, "RecordBlockHead size should aligned by 64 bytes");
* shadow memory动态分配空间管理
*/
struct ShadowMemoryHeapHeadImpl {
uint64_t startAddr{0U};
uint64_t size{0U};
uint64_t current{0U};
uint64_t lock{0U};
};
using ShadowMemoryHeapHead = StructAlignBy<ShadowMemoryHeapHeadImpl, 64UL>;
static_assert(sizeof(ShadowMemoryHeapHead) % 64UL == 0UL, "ShadowMemoryHeapHead size should aligned by 64 bytes");
enum class MemOpType : uint32_t {
MALLOC = 0U,
FREE,
MEMCPY_BLOCKS,
LOAD,
STORE,
INVALID,
};
enum class AccessType: uint8_t {
READ = 0U,
WRITE,
MEMCPY_BLOCKS,
};
enum class AddressSpace : int32_t {
PRIVATE = 0,
GM,
L1,
L0A,
L0B,
L0C,
UB,
BT,
FB,
INVALID = -1,
};
struct SimtThreadLocation {
uint16_t idX;
uint16_t idY;
uint16_t idZ;
bool operator==(const SimtThreadLocation &rhs) const
{
return this->idX == rhs.idX &&
this->idY == rhs.idY &&
this->idZ == rhs.idZ;
}
};
struct Location {
uint64_t fileNo;
uint64_t lineNo;
uint64_t pc;
uint16_t blockId;
bool operator==(const Location &rhs) const
{
return this->fileNo == rhs.fileNo &&
this->lineNo == rhs.lineNo &&
this->pc == rhs.pc &&
this->blockId == rhs.blockId;
}
};
struct ShadowMemoryRecordHead {
uint32_t type = 80000;
uint64_t recordCount;
};
struct ShadowMemoryRecord {
uint64_t addr;
uint64_t size;
Location location;
SimtThreadLocation threadLoc;
AddressSpace space;
AccessType accessType;
};
enum class MemInfoSrc : uint8_t {
BYPASS = 0,
HAL,
RT,
ACL,
EXTRA,
MANUAL,
MSTX_HEAP,
MSTX_REGION,
};
enum class MemInfoDesc : uint8_t {
DEFAULT = 0,
INPUT,
TILING,
DOUBLE_PTR,
HCCL_MC2_CONTEXT,
SECTION,
IPC_MEMORY,
OVERFLOW_ADDR,
PARA_BASE,
};
struct HostMemRecord {
MemOpType type;
MemInfoSrc infoSrc;
MemInfoDesc infoDesc;
uint64_t srcAddr;
uint64_t dstAddr;
uint64_t memSize;
uint64_t paramsNo;
uint64_t rootAddr;
};
struct GMAddrOutOfBoundRecord {
uint64_t userAddr;
uint32_t userSize;
uint64_t outAddr;
uint32_t outSize;
};
enum class IPCOperationType : uint32_t { SET_INFO = 0, DESTROY_INFO, MAP_INFO, UNMAP_INFO };
struct IPCMemorySetInfo {
uint64_t addr;
uint64_t size;
char name[64];
};
struct IPCMemoryDestroyInfo {
char name[64];
};
struct IPCMemoryMapInfo {
uint64_t addr;
char name[64];
};
* 在 `rtIpcCloseMemory` 被调用后发送
*/
struct IPCMemoryUnmapInfo {
uint64_t addr;
};
struct IPCMemRecord {
IPCOperationType type;
union {
IPCMemorySetInfo setInfo;
IPCMemoryDestroyInfo destroyInfo;
IPCMemoryMapInfo mapInfo;
IPCMemoryUnmapInfo unmapInfo;
};
};
enum class DemangleMode : uint8_t {
FULL_DEMANGLED_NAME = 0,
SIMPLE_DEMANGLED_NAME,
MANGLED_NAME,
};
struct SanitizerConfig {
bool defaultCheck;
bool memCheck;
bool raceCheck;
bool initCheck;
bool syncCheck;
bool registerCheck;
bool checkDeviceHeap;
bool checkCannHeap;
bool leakCheck;
bool checkUnusedMemory;
bool checkCrossNpuRaces;
bool isPrintFullStack{false};
int16_t checkBlockId = CHECK_ALL_BLOCK;
uint32_t cacheSize = DEFAULT_CACHE_SIZE;
DemangleMode demangleMode{DemangleMode::FULL_DEMANGLED_NAME};
char pluginPath[PLUGIN_PATH_MAX];
char kernelName[KERNEL_NAME_MAX];
char dumpPath[DUMP_PATH_MAX];
uint32_t gmBufferGuardSize = GM_BUFFER_GUARD_DFT_SIZE;
};
enum class ResponseStatus : uint32_t { SUCCESS = 0, FAIL = 1000 };
struct IPCResponse {
IPCOperationType type;
ResponseStatus status;
};
struct KernelRecordResponse {
uint32_t blockIdx;
ResponseStatus status;
};
struct MemRegionPermissionDesc {
uint64_t addr;
uint64_t size;
uint32_t deviceId;
uint32_t flags;
};