* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* ------------------------------------------------------------------------- */
#ifndef PLUGIN_ONLINE_CHECK_H
#define PLUGIN_ONLINE_CHECK_H
#include "core/framework/record_defs.h"
#include "kernel_pub_func.h"
#include "record_type_map.h"
#include "parse_record.h"
#include "shadow_memory_online.h"
namespace Sanitizer {
* head处记录了host侧的malloc信息
* 桩函数记录时,会解析head处的malloc信息并做比对,
* 如果有内存错误则记录错误行为信息,无错误则直接返回;
*
* 使用方法如下
* @code
* OnlineCheck check();
* check.Init(memInfo,memInfoBlock);
* check.Process<RecordType::SIMT_LDG>(record);
* @endcode
*/
class OnlineCheck {
public:
AICORE_FUNC_HEAD __attribute__((always_inline)) OnlineCheck() : memInfo_{nullptr}, memInfoSimt_{nullptr}, memInfoSimd_{nullptr},
#if defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)
globalHead_{nullptr}, simtBlockHead_{nullptr}, simdBlockHead_{nullptr}, sortedLen_{}, blockIdx_{},
shadowMemory_()
#else
globalHead_{nullptr}, simtBlockHead_{nullptr}, simdBlockHead_{nullptr}, sortedLen_{}, blockIdx_{}
#endif
{}
* @param memInfoSimt 当前block对应的simt指针
* @param memInfoSimd 当前block对应的simd指针
* @param blockIdx 当前block数
* @brief 初始化内存检测类
*/
AICORE_FUNC_HEAD void Init(__gm__ uint8_t *memInfo, __gm__ uint8_t *memInfoSimt, __gm__ uint8_t *memInfoSimd,
uint64_t blockIdx);
* @tparam Record 记录结构体类型
* @param record 指令记录信息
* @brief 传入栈上的记录,判断栈上的记录存在内存错误行为;
*/
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void Process(Record const &record);
* @brief 处理para base addr地址,将kernel入参地址写入到blockHead对应位置
*/
AICORE_FUNC_HEAD void ProcessParaBaseAddr();
#if defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)
AICORE_FUNC_HEAD void ClearSyncThreadState() {
shadowMemory_.ClearSyncThreadState();
}
#endif
#if defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)
AICORE_FUNC_HEAD void CopyShadowMemoryToMemInfo() {
shadowMemory_.CopyShadowMemoryToMemInfo();
}
#endif
private:
* @param size 待检查的地址长度
* @param thresholdAddr 地址阈值
* @param thresholdSize 长度阈值
* @brief 计算待检查地址和阈值范围的交集长度,返回值表示交集长度
*/
AICORE_FUNC_HEAD uint64_t CalIntersectionSize(uint64_t addr, uint64_t size, uint64_t thresholdAddr,
uint64_t thresholdSize) const;
* @tparam Record 记录结构体类型
* @param addrInfo simt指令的信息
* @param record 指令记录信息
* @brief 计算内存操作行为的错误信息,支持多种错误类型的同时记录
*/
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void Do(AddrInfo const &addrInfo, Record const &record);
* @param illegalSize 错误长度
* @brief 检测当前gm指令内存行为是否有非法读写行为,如果有则返回非法读写的长度,返回值表示是否有非法行为
*/
AICORE_FUNC_HEAD bool GmReadWriteCheck(AddrInfo const &addrInfo, uint64_t &illegalSize) const;
* @param illegalSize 错误长度
* @brief 检测当前ub指令内存行为是否有非法读写行为,如果有则返回非法读写的长度,返回值表示是否有非法行为
*/
AICORE_FUNC_HEAD bool UbReadWriteCheck(AddrInfo const &addrInfo, uint64_t &illegalSize) const;
* @brief 检测当前内存行为是否有非对齐读写行为,如果有则返回true,否则返回false
*/
AICORE_FUNC_HEAD bool AlignCheck(AddrInfo const &addrInfo) const;
* @tparam Record 记录结构体类型
* @param errorRecord 错误信息记录
* @param errorDesc 错误信息具体描述
* @param record 指令记录信息
* @param cacheWriteOffset 缓存的记录写入偏移
* @brief 将当前记录的错误信息dump到gm上保存
* dump协议如下:
* ONLINE_ERROR | KernelErrorRecord | Record | KernelErrorDesc_1 | KernelErrorDesc_2 | .....
*/
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void DumpErrorInfo(KernelErrorRecord &errorRecord, KernelErrorDesc const &errorDesc,
Record const &record, uint64_t cacheWriteOffset);
* @brief 将kernel入参地址写入到blockHead对应位置
*/
AICORE_FUNC_HEAD bool WriteParaBaseAddr();
* @brief 对simdHead处的内存地址进行插入排序,默认升序
*/
AICORE_FUNC_HEAD void InsertionSortMemory();
* @brief 将simdHead处的内存地址合并为不连续的内存序列,便于后续求越界长度
*/
AICORE_FUNC_HEAD void MergeMemory();
private:
__gm__ uint8_t *memInfo_;
__gm__ uint8_t *memInfoSimt_;
__gm__ uint8_t *memInfoSimd_;
__gm__ RecordGlobalHead *globalHead_;
__gm__ SimtRecordBlockHead *simtBlockHead_;
__gm__ RecordBlockHead *simdBlockHead_;
uint32_t sortedLen_;
int16_t blockIdx_;
#if (defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)) || defined(__BUILD_TESTS__)
AICORE_FUNC_HEAD void ShadowMemoryCheck(AddrInfo const &addrInfo, ShadowMemoryOnline::AuxInfo &auxInfo);
ShadowMemoryOnline shadowMemory_;
AICORE_FUNC_HEAD bool UpdateSyncThreadPcNum(uint64_t pc);
AICORE_FUNC_HEAD bool SortSyncThreadPcNumInPlace(
__gm__ SimtRecordBlockHead *simtBlockHead0, uint16_t &validPcNum, uint32_t *tmpCounts);
AICORE_FUNC_HEAD void GetMaxSyncThreadPcNum(uint16_t &validPcNum, uint32_t *tmpCounts);
#endif
};
AICORE_FUNC_HEAD void OnlineCheck::Init(__gm__ uint8_t *memInfo, __gm__ uint8_t *memInfoSimt,
__gm__ uint8_t *memInfoSimd, uint64_t blockIdx)
{
memInfo_ = memInfo;
memInfoSimt_ = memInfoSimt;
memInfoSimd_ = memInfoSimd;
blockIdx_ = blockIdx;
globalHead_ = reinterpret_cast<__gm__ RecordGlobalHead *>(memInfo);
simtBlockHead_ = reinterpret_cast<__gm__ SimtRecordBlockHead *>(memInfoSimt_);
simdBlockHead_ = reinterpret_cast<__gm__ RecordBlockHead *>(memInfoSimd_);
#if (defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)) || defined(__BUILD_TESTS__)
shadowMemory_.Init((uint64_t)(memInfoSimd + globalHead_->offsetInfo.shadowMemoryInfo.offset),
globalHead_->offsetInfo.shadowMemoryInfo.size, memInfo, memInfoSimt, memInfoSimd);
auto &blockInfo = simdBlockHead_->blockInfo;
uint16_t threadXDim{}, threadYDim{}, threadZDim{};
GetThreadDim(threadXDim, threadYDim, threadZDim);
blockInfo.threadXDim = threadXDim;
blockInfo.threadYDim = threadYDim;
blockInfo.threadZDim = threadZDim;
#endif
simdBlockHead_->blockInfo.blockId = GetBlockIdx();
}
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void OnlineCheck::Process(Record const &record)
{
if (memInfo_ == nullptr) {
return;
}
AddrInfo addrInfo = ParseRecord<recordType>(record);
Do<recordType>(addrInfo, record);
}
AICORE_FUNC_HEAD void OnlineCheck::ProcessParaBaseAddr()
{
if (memInfo_ == nullptr) {
return;
}
if (!WriteParaBaseAddr()) {
return;
}
InsertionSortMemory();
MergeMemory();
simdBlockHead_->extraWriteSuccess = true;
Flush(memInfoSimd_);
}
AICORE_FUNC_HEAD uint64_t OnlineCheck::CalIntersectionSize(uint64_t addr, uint64_t size, uint64_t thresholdAddr,
uint64_t thresholdSize) const
{
if (addr + size <= thresholdAddr || addr >= thresholdAddr + thresholdSize || thresholdSize == 0U) {
return 0U;
}
if (addr <= thresholdAddr && addr + size >= thresholdAddr + thresholdSize) {
return thresholdSize;
} else if (addr + size > thresholdAddr + thresholdSize) {
return thresholdAddr - addr + thresholdSize;
} else if (addr + size > thresholdAddr + thresholdSize) {
return addr - thresholdAddr + size;
} else {
return size;
}
}
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void OnlineCheck::Do(AddrInfo const &addrInfo, Record const &record)
{
uint64_t cacheWriteOffset = simtBlockHead_->writeOffset;
KernelErrorRecord errorRecord{};
errorRecord.recordType = recordType;
errorRecord.recordSize = sizeof(Record);
KernelErrorDesc errorDesc{};
errorDesc.location = addrInfo.location;
errorDesc.threadLoc = addrInfo.threadLoc;
errorDesc.space = addrInfo.space;
uint64_t illegalSize = 0U;
if (DoMemCheck(memInfo_) && (GmReadWriteCheck(addrInfo, illegalSize) ||
UbReadWriteCheck(addrInfo, illegalSize))) {
auto &illegalDesc = errorDesc.payload.illegalDesc;
illegalDesc.addr = addrInfo.addr;
illegalDesc.illegalSize = illegalSize;
if (addrInfo.opType == AccessType::MEMCPY_BLOCKS) {
errorDesc.errorType = KernelErrorType::ILLEGAL_ADDR_READ;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
errorDesc.errorType = KernelErrorType::ILLEGAL_ADDR_WRITE;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
} else {
errorDesc.errorType = addrInfo.opType == AccessType::READ ?
KernelErrorType::ILLEGAL_ADDR_READ : KernelErrorType::ILLEGAL_ADDR_WRITE;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
}
if (DoMemCheck(memInfo_) && AlignCheck(addrInfo)) {
auto &misAlignDesc = errorDesc.payload.misAlignDesc;
misAlignDesc.addr = addrInfo.addr;
misAlignDesc.misAlignSize = addrInfo.alignSize;
errorDesc.errorType = KernelErrorType::MISALIGNED_ACCESS;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
#if defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)
if (DoMemCheck(memInfo_) || DoRaceCheck(memInfo_) || DoInitCheck(memInfo_)) {
ShadowMemoryOnline::AuxInfo auxInfo{};
ShadowMemoryCheck(addrInfo, auxInfo);
errorDesc.l1StartAddr = auxInfo.l1StartAddr;
errorDesc.l2StartAddr = auxInfo.l2StartAddr;
errorDesc.l2MemStatusAddr = auxInfo.l2MemStatusAddr;
auto &overLapError = auxInfo.errorInfo[ShadowMemoryOnline::overLapErrorIdx];
if (overLapError.errorType == KernelErrorType::THREAD_OVERLAP) {
errorDesc.errorType = overLapError.errorType;
auto &overLapDesc = errorDesc.payload.overLapDesc;
overLapDesc.addr = addrInfo.addr;
overLapDesc.overLapSize = overLapError.nBadBytes;
overLapDesc.conflictedThreadLoc = overLapError.conflictedThreadLoc;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
auto &raceError = auxInfo.errorInfo[ShadowMemoryOnline::raceErrorIdx];
if (raceError.errorType == KernelErrorType::THREAD_RW_RACE ||
raceError.errorType == KernelErrorType::THREAD_WR_RACE ||
raceError.errorType == KernelErrorType::THREAD_WW_RACE) {
errorDesc.errorType = raceError.errorType;
auto &raceDesc = errorDesc.payload.raceDesc;
raceDesc.addr = addrInfo.addr;
raceDesc.conflictedThreadLoc = raceError.conflictedThreadLoc;
raceDesc.conflictedLocation.pc = raceError.pc;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
auto &initError = auxInfo.errorInfo[ShadowMemoryOnline::initErrorIdx];
if (initError.errorType == KernelErrorType::UNINITIALIZED_READ) {
errorDesc.errorType = initError.errorType;
auto &unitializedDesc = errorDesc.payload.unitializedDesc;
unitializedDesc.addr = addrInfo.addr;
unitializedDesc.errorSize = initError.nBadBytes;
unitializedDesc.threadLoc = initError.conflictedThreadLoc;
unitializedDesc.pc = initError.pc;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
auto &writeLoss = auxInfo.errorInfo[ShadowMemoryOnline::writeLossIdx];
if (writeLoss.errorType == KernelErrorType::WRITE_LOSS) {
errorDesc.errorType = writeLoss.errorType;
auto &writeLossDesc = errorDesc.payload.writeLossDesc;
writeLossDesc.addr = addrInfo.addr;
writeLossDesc.memSize = writeLoss.nBadBytes;
writeLossDesc.pc = writeLoss.pc;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
}
if (DoSyncCheck(memInfo_)) {
if (recordType == RecordType::THREAD_BLOCK_BARRIER) {
if (!UpdateSyncThreadPcNum(addrInfo.location.pc)) {
errorDesc.errorType = KernelErrorType::SYNC_THREADS_RECORD_LOSS;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
}
if (recordType == RecordType::SIMT_END) {
auto &blockInfo = simdBlockHead_->blockInfo;
uint64_t ret = AtomicAdd(&blockInfo.simtEndLastThread, 1);
if (ret == (blockInfo.threadXDim * blockInfo.threadYDim * blockInfo.threadZDim - 1)) {
uint16_t validPcNum{0};
uint32_t tmpCounts[SIMT_THREAD_MAX_PC_NUM] = {0};
uint64_t threadOffset0 = globalHead_->offsetInfo.simtErrorInfo.offset;
__gm__ uint8_t *simtBlock0 = memInfoSimd_ + threadOffset0;
__gm__ SimtRecordBlockHead *simtBlockHead0 = reinterpret_cast<__gm__ SimtRecordBlockHead *>(simtBlock0);
if (!SortSyncThreadPcNumInPlace(simtBlockHead0, validPcNum, tmpCounts)) {
errorDesc.errorType = KernelErrorType::SYNC_THREADS_RECORD_LOSS;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
GetMaxSyncThreadPcNum(validPcNum, tmpCounts);
errorDesc.errorType = KernelErrorType::THREADS_ASYNC_IN_BLOCK;
auto &syncDesc = errorDesc.payload.syncDesc;
Location loc;
SimtThreadLocation threadLoc{};
for (size_t pcIdx = 0; pcIdx < validPcNum; ++pcIdx) {
for (size_t threadIdx = 0; threadIdx < blockInfo.simtEndLastThread; ++threadIdx) {
uint64_t threadOffset = globalHead_->offsetInfo.simtErrorInfo.offset +
threadIdx * (globalHead_->offsetInfo.simtErrorInfo.size + sizeof(SimtRecordBlockHead));
__gm__ uint8_t *simtBlock = memInfoSimd_ + threadOffset;
__gm__ SimtRecordBlockHead *simtBlockHead = reinterpret_cast<__gm__ SimtRecordBlockHead *>(simtBlock);
if (simtBlockHead->syncThreadNum[pcIdx] < tmpCounts[pcIdx]) {
loc.pc = simtBlockHead0->syncThreadPC[pcIdx];
syncDesc.syncLocation = loc;
DecomposeThreadId(threadIdx, threadLoc.idX, threadLoc.idY, threadLoc.idZ);
syncDesc.syncThreadLoc = threadLoc;
DumpErrorInfo<recordType>(errorRecord, errorDesc, record, cacheWriteOffset);
}
}
}
}
}
}
#endif
}
#if defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510) && defined(SIMT_MODE)
AICORE_FUNC_HEAD void OnlineCheck::ShadowMemoryCheck(AddrInfo const &addrInfo, ShadowMemoryOnline::AuxInfo &auxInfo) {
if (addrInfo.space != AddressSpace::GM && addrInfo.space != AddressSpace::UB) {
return;
}
if (!shadowMemory_.IsReady() || memInfo_ == nullptr) {
return;
}
if (shadowMemory_.InvalidRange(addrInfo)) {
return;
}
if (addrInfo.opType == AccessType::READ) {
shadowMemory_.LoadNBytes(addrInfo, auxInfo);
} else if (addrInfo.opType == AccessType::MEMCPY_BLOCKS) {
shadowMemory_.LoadNBytes(addrInfo, auxInfo);
shadowMemory_.StoreNBytes(addrInfo, auxInfo);
} else {
shadowMemory_.StoreNBytes(addrInfo, auxInfo);
}
}
AICORE_FUNC_HEAD bool OnlineCheck::UpdateSyncThreadPcNum(uint64_t pc)
{
for (size_t i = 0; i < SIMT_THREAD_MAX_PC_NUM; ++i) {
if (simtBlockHead_->syncThreadPC[i] == 0) {
simtBlockHead_->syncThreadPC[i] = pc;
++simtBlockHead_->syncThreadNum[i];
return true;
} else if (simtBlockHead_->syncThreadPC[i] == pc) {
++simtBlockHead_->syncThreadNum[i];
return true;
}
}
return false;
}
AICORE_FUNC_HEAD bool OnlineCheck::SortSyncThreadPcNumInPlace(
__gm__ SimtRecordBlockHead *simtBlockHead0, uint16_t &validPcNum, uint32_t *tmpCounts) {
bool isSortedAll{true};
for (size_t numIdx = 0; numIdx < SIMT_THREAD_MAX_PC_NUM; ++numIdx) {
if (simtBlockHead0->syncThreadPC[numIdx] == 0) {
break;
}
++validPcNum;
}
if (simdBlockHead_->blockInfo.simtEndLastThread <= 1) {
return true;
}
for (size_t threadIdx = 1; threadIdx < simdBlockHead_->blockInfo.simtEndLastThread; ++threadIdx) {
uint64_t threadOffset = globalHead_->offsetInfo.simtErrorInfo.offset +
threadIdx * (globalHead_->offsetInfo.simtErrorInfo.size + sizeof(SimtRecordBlockHead));
__gm__ uint8_t *simtBlock = memInfoSimd_ + threadOffset;
__gm__ SimtRecordBlockHead *simtBlockHead = reinterpret_cast<__gm__ SimtRecordBlockHead *>(simtBlock);
for (size_t numIdx = 0; numIdx < SIMT_THREAD_MAX_PC_NUM; ++numIdx) {
uint32_t pc = simtBlockHead->syncThreadPC[numIdx];
if (pc == 0) {
break;
}
uint32_t count = simtBlockHead->syncThreadNum[numIdx];
bool found = false;
for (uint16_t j = 0; j < validPcNum; ++j) {
if (simtBlockHead0->syncThreadPC[j] == pc) {
tmpCounts[j] = count;
found = true;
break;
}
}
if (!found) {
if (validPcNum < SIMT_THREAD_MAX_PC_NUM) {
simtBlockHead0->syncThreadPC[validPcNum] = pc;
tmpCounts[validPcNum] = count;
++validPcNum;
} else {
isSortedAll = false;
break;
}
}
}
for (size_t each = 0; each < validPcNum; ++each) {
simtBlockHead->syncThreadNum[each] = tmpCounts[each];
tmpCounts[each] = 0;
}
}
return isSortedAll;
}
AICORE_FUNC_HEAD void OnlineCheck::GetMaxSyncThreadPcNum(uint16_t &validPcNum, uint32_t *tmpCounts) {
for (size_t pcIdx = 0; pcIdx < validPcNum; ++pcIdx) {
uint32_t maxNum = 0;
for (size_t threadIdx = 0; threadIdx < simdBlockHead_->blockInfo.simtEndLastThread; ++threadIdx) {
uint64_t threadOffset = globalHead_->offsetInfo.simtErrorInfo.offset +
threadIdx * (globalHead_->offsetInfo.simtErrorInfo.size + sizeof(SimtRecordBlockHead));
__gm__ uint8_t *simtBlock = memInfoSimd_ + threadOffset;
__gm__ SimtRecordBlockHead *simtBlockHead = reinterpret_cast<__gm__ SimtRecordBlockHead *>(simtBlock);
if (simtBlockHead->syncThreadNum[pcIdx] > maxNum) {
maxNum = simtBlockHead->syncThreadNum[pcIdx];
}
}
tmpCounts[pcIdx] = maxNum;
}
}
#endif
AICORE_FUNC_HEAD bool HasPermission(AccessType accessType, uint32_t permission) {
if (accessType == AccessType::READ) {
return (permission & MSTX_MEM_PERMISSIONS_REGION_FLAGS_READ) != 0;
} else if (accessType == AccessType::WRITE) {
return (permission & MSTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE) != 0;
} else {
return (permission & MSTX_MEM_PERMISSIONS_REGION_FLAGS_READ) != 0 &&
(permission & MSTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE) != 0;
}
}
AICORE_FUNC_HEAD bool OnlineCheck::GmReadWriteCheck(AddrInfo const &addrInfo, uint64_t &illegalSize) const
{
if (addrInfo.space != AddressSpace::GM) {
return false;
}
uint64_t intersectionSize{};
uint64_t addr = addrInfo.addr;
uint64_t size = addrInfo.size;
for (size_t memIdx = 0; memIdx < simdBlockHead_->hostMemoryNum; ++memIdx) {
__gm__ HostMemoryInfo const &mallocInfo = simdBlockHead_->hostMemoryInfoPtr[memIdx];
if (!HasPermission(addrInfo.opType, mallocInfo.permission)) {
continue;
}
intersectionSize += CalIntersectionSize(addr, size, mallocInfo.addr, mallocInfo.size);
}
illegalSize += size - intersectionSize;
return illegalSize > 0U;
}
AICORE_FUNC_HEAD bool OnlineCheck::UbReadWriteCheck(AddrInfo const &addrInfo, uint64_t &illegalSize) const
{
if (addrInfo.space != AddressSpace::UB) {
return false;
}
uint64_t addr = addrInfo.addr;
uint64_t size = addrInfo.size;
uint32_t ubSize = globalHead_->simtInfo.ubDynamicSize;
if (addr >= ubSize) {
illegalSize = size;
} else if (addr + size > ubSize) {
illegalSize = addr + size - ubSize;
}
return illegalSize > 0U;
}
template<RecordType recordType, typename Record>
AICORE_FUNC_HEAD void OnlineCheck::DumpErrorInfo(KernelErrorRecord &errorRecord, KernelErrorDesc const &errorDesc,
Record const &record, uint64_t cacheWriteOffset)
{
constexpr uint32_t FIRST_ERROR_NUM = 1;
errorRecord.errorNum++;
__gm__ uint8_t *startPtr = memInfoSimt_ + sizeof(SimtRecordBlockHead) + cacheWriteOffset;
__gm__ RecordType *errorType = reinterpret_cast<__gm__ RecordType *>(startPtr);
*errorType = RecordType::ONLINE_ERROR;
__gm__ KernelErrorRecord *gmErrorRecord = reinterpret_cast<__gm__ KernelErrorRecord *>(errorType + 1);
__gm__ Record *gmRecord = reinterpret_cast<__gm__ Record *>(gmErrorRecord + 1);
__gm__ KernelErrorDesc *gmErrorDesc = reinterpret_cast<__gm__ KernelErrorDesc *>(
reinterpret_cast<__gm__ uint8_t *>(gmRecord + 1) + sizeof(KernelErrorDesc) * (errorRecord.errorNum - 1));
uint64_t stepSize = errorRecord.errorNum == FIRST_ERROR_NUM ? sizeof(RecordType) + sizeof(KernelErrorRecord) +
sizeof(Record) + sizeof(KernelErrorDesc) : sizeof(KernelErrorDesc);
if (simtBlockHead_->writeOffset + CACHE_LINE_SIZE + stepSize < globalHead_->offsetInfo.simtErrorInfo.size &&
simtBlockHead_->recordCount == simtBlockHead_->recordWriteCount) {
CopyRecordToGm(gmErrorRecord, &errorRecord);
CopyRecordToGm(gmRecord, &record);
CopyRecordToGm(gmErrorDesc, &errorDesc);
simtBlockHead_->writeOffset += stepSize;
if (errorRecord.errorNum == FIRST_ERROR_NUM) {
simtBlockHead_->recordWriteCount++;
simtBlockHead_->recordCount++;
}
} else {
simtBlockHead_->recordCount++;
}
simtBlockHead_->offset += stepSize;
Flush(memInfoSimt_);
}
AICORE_FUNC_HEAD bool CheckRegIdxValid(int64_t regIdx)
{
return (regIdx >= 0) && (regIdx <= C220_A2_OR_A3_EVEN_DEVICE_VEC_PHYS_CORE_END_IDS);
}
AICORE_FUNC_HEAD int64_t GetRegisterIdx()
{
int64_t coreId{};
#if defined(__CCE_IS_AICORE__) && __CCE_IS_AICORE__ == 1
#if defined(__DAV_C220__) || defined(__DAV_C220_VEC__) || defined(__DAV_C220_CUBE__) || \
(defined(__NPU_ARCH__) && (__NPU_ARCH__ == 3101 || __NPU_ARCH__ == 3510))
#ifdef SIMT_MODE
coreId = __cce_simt_get_COREID();
#else
coreId = get_coreid();
#endif
#endif
if (coreId >= 0 && coreId <= C220_A2_OR_A3_EVEN_DEVICE_VEC_PHYS_CORE_END_IDS) {
return coreId;
}
if (coreId >= C220_A3_ODD_DEVICE_VEC_CUBE_CORE_START_IDS && coreId <= C220_A3_ODD_DEVICE_VEC_PHYS_CORE_END_IDS) {
return coreId - C220_A3_ODD_DEVICE_VEC_CUBE_CORE_START_IDS;
}
#endif
return coreId;
}
AICORE_FUNC_HEAD bool OnlineCheck::WriteParaBaseAddr()
{
if (globalHead_->checkParms.checkBlockId != CHECK_ALL_BLOCK && globalHead_->checkParms.checkBlockId != blockIdx_) {
return false;
}
if (simdBlockHead_->extraWriteSuccess) {
return false;
}
int64_t regIdx = GetRegisterIdx();
if (!CheckRegIdxValid(regIdx)) {
return false;
}
uint64_t *addrInfo = reinterpret_cast<uint64_t *>(simdBlockHead_->paraBase.addr);
uint32_t extraIndex = 0;
for (uint32_t i = 0; i < simdBlockHead_->hostMemoryNum; ++i) {
if (simdBlockHead_->hostMemoryInfoPtr[i].addr == 0x0) {
break;
}
extraIndex++;
}
sortedLen_ = extraIndex;
for (uint32_t i = 0; i < globalHead_->kernelInfo.kernelParamNum; ++i) {
if (extraIndex + i >= simdBlockHead_->hostMemoryNum) {
break;
}
simdBlockHead_->hostMemoryInfoPtr[extraIndex + i].addr = addrInfo[i];
}
return true;
}
AICORE_FUNC_HEAD void OnlineCheck::InsertionSortMemory()
{
auto &memoryInfoPtr = simdBlockHead_->hostMemoryInfoPtr;
for (uint32_t i = sortedLen_; i < simdBlockHead_->hostMemoryNum; ++i) {
auto keyAddr = memoryInfoPtr[i].addr;
auto keySize = memoryInfoPtr[i].size;
if ((keyAddr == 0x0) || i == 0) { continue; }
int64_t j = i - 1;
while (j >= 0 && (memoryInfoPtr[j].addr > keyAddr || memoryInfoPtr[j].addr == 0x0)) {
memoryInfoPtr[j + 1].addr = memoryInfoPtr[j].addr;
memoryInfoPtr[j + 1].size = memoryInfoPtr[j].size;
j--;
}
memoryInfoPtr[j + 1].addr = keyAddr;
memoryInfoPtr[j + 1].size = keySize;
}
}
AICORE_FUNC_HEAD void OnlineCheck::MergeMemory()
{
auto &memoryInfoPtr = simdBlockHead_->hostMemoryInfoPtr;
if (simdBlockHead_->hostMemoryNum <= 1) {
return;
}
uint32_t index{};
for (uint32_t i = 1; i < simdBlockHead_->hostMemoryNum; ++i) {
uint64_t currAddr = memoryInfoPtr[i].addr;
uint64_t currSize = memoryInfoPtr[i].size;
uint64_t currEnd = currAddr + currSize;
uint64_t lastAddr = memoryInfoPtr[index].addr;
uint64_t lastSize = memoryInfoPtr[index].size;
uint64_t lastEnd = lastAddr + lastSize;
if (currAddr <= lastEnd) {
uint64_t newEnd = lastEnd > currEnd ? lastEnd : currEnd;
memoryInfoPtr[index].size = newEnd - lastAddr;
} else {
index++;
memoryInfoPtr[index].addr = memoryInfoPtr[i].addr;
memoryInfoPtr[index].size = memoryInfoPtr[i].size;
}
}
for (uint32_t i = index + 1; i < simdBlockHead_->hostMemoryNum; ++i) {
memoryInfoPtr[i].addr = 0x0;
memoryInfoPtr[i].size = 0;
}
}
AICORE_FUNC_HEAD bool OnlineCheck::AlignCheck(const AddrInfo &addrInfo) const
{
return addrInfo.addr % addrInfo.alignSize != 0;
}
}
#endif