* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file sk_lock_detector.h
* \brief
*/
#ifndef __SK_LOCK_DETECTOR_H__
#define __SK_LOCK_DETECTOR_H__
#include <vector>
#include <algorithm>
#include <unordered_map>
#include <set>
#include "sk_types.h"
#include "sk_node.h"
#include "sk_graph.h"
#include "sk_scope_info.h"
#include "acl/acl.h"
class SuperKernelOptionsManager;
* \enum DeadlockFailReason
* \brief Detailed failure reasons for deadlock detection
*/
enum class DeadlockFailReason : uint8_t {
NOT_FIND_DEADLOCK = 0,
KERNEL_INSUFFICIENT_CORES,
NOTIFY_NOT_IN_GRAPH,
NOTIFY_AFTER_SK_RANGE,
NOTIFY_INVALID,
NOTIFY_INSUFFICIENT_CORES,
FIRST_WAIT,
NO_SUPPORT_NODE,
};
* \brief Convert DeadlockFailReason to string
*/
inline const char* DeadlockFailReasonToStr(DeadlockFailReason reason)
{
switch (reason) {
case DeadlockFailReason::NOT_FIND_DEADLOCK:
return "not find deadlock";
case DeadlockFailReason::KERNEL_INSUFFICIENT_CORES:
return "The wait node depends on a kernel node that requires more cores than available";
case DeadlockFailReason::NOTIFY_INSUFFICIENT_CORES:
return "The wait node depends on a notify node which has fused in other SK that it requires more cores";
case DeadlockFailReason::NOTIFY_NOT_IN_GRAPH:
return "The wait node depends on a notify node that it is not in graph";
case DeadlockFailReason::NOTIFY_AFTER_SK_RANGE:
return "The wait node depends on a notify node that it is after SK range";
case DeadlockFailReason::NOTIFY_INVALID:
return "In check deadlock of this wait node, get invalid notify node";
case DeadlockFailReason::FIRST_WAIT:
return "The wait node is first node in scope";
case DeadlockFailReason::NO_SUPPORT_NODE:
return "The wait node waiting for unsupport node type";
default:
return "UNKNOWN REASON";
}
}
* @class LockDetector
* @brief 死锁检测器,用于检测SuperKernel融合时是否会引入死锁
*
* 该类通过分析节点的核心使用情况和依赖关系,判断是否可以将节点融合到SuperKernel中。
*/
class LockDetector {
public:
* @brief 构造函数,传入图对象
* @param graph 图对象引用
*/
explicit LockDetector(SuperKernelGraph& graph)
{
Init(graph);
}
LockDetector(SuperKernelGraph& graph, const SuperKernelOptionsManager& opts) : opts_(&opts)
{
Init(graph);
}
~LockDetector()
{
if (graph_ != nullptr) {
Reset();
}
}
static aclError GetDeviceCores();
* @brief 重置检测器状态
*/
void Reset();
* @brief 获取最近一次检测到的死锁原因
*/
DeadlockFailReason GetDeadlockReason() const { return deadlockReason_; }
* @brief 判断节点是否可融合到SuperKernel中
* @param curNode 待检测的节点
* @return true 可融合,false 不可融合(会导致死锁)
*/
bool IsFusible(SuperKernelBaseNode& curNode);
* @brief 设置scope中Notify节点的expand number
* @param scope Scope信息
*
* 该函数遍历scope中的所有节点,找出最大的vec/cube num,
* 并将其设置到所有Notify节点上。
*/
void SetNotifyNodesExpandNumForScope(SuperKernelScopeInfo& scope);
* @brief 重置scope中Notify节点的expand number为0
* @param scope Scope信息
*
* 该函数将scope中所有Notify节点的expandVecNum和expandCubeNum重置为0,
* 用于在死锁检测Pass结束时复原节点状态,使得Pass可重入。
*/
void ResetNotifyExpandNumForScope(SuperKernelScopeInfo& scope);
private:
void Init(SuperKernelGraph& graph);
std::pair<uint64_t, uint64_t> GetAvailableCores(bool isSuperKernel) const;
bool IsInSKStream(const SuperKernelBaseNode& node);
void UpdateNodeInfo(const SuperKernelBaseNode& node);
void UpdateSKRangeInStream(const SuperKernelBaseNode& node);
bool IsBeforeSKRange(const SuperKernelBaseNode& node);
bool IsAfterSKRange(const SuperKernelBaseNode& node);
bool HasIntersection(const std::unordered_set<uint32_t>& lhsStreams, const std::unordered_set<uint32_t>& rhsStreams);
bool HasDeadlock(SuperKernelBaseNode* curNode);
bool CheckKernelNodeDeadlock(SuperKernelBaseNode* preNode);
bool CheckWaitNodeDeadlock(SuperKernelBaseNode* preNode);
bool CheckNotifyNodeDeadlock(SuperKernelBaseNode* preNode);
bool GetWaitNodeFusibleStatus(SuperKernelBaseNode& curNode);
bool CheckNotifyInSKStream(SuperKernelBaseNode& curNode, SuperKernelBaseNode& notifyNode);
bool GetFusibleStatus(SuperKernelBaseNode& curNode);
bool ShouldBypassValueWaitDeadlock(const SuperKernelBaseNode& curNode) const;
bool HasEnoughCores(const SuperKernelBaseNode* curNode, bool isSuperKernel);
void RollbackVisitedState(std::vector<uint64_t>& visitedNodes);
std::vector<uint64_t> nodes;
std::vector<uint64_t> tempVisitedNodes;
uint32_t depOpCubeNum;
uint32_t depOpVecNum;
uint32_t superKernelCubeNum;
uint32_t superKernelVecNum;
static int64_t deviceRealCubeNum;
static int64_t deviceRealVecNum;
std::unordered_set<uint32_t> skStreamIds;
uint32_t nodeNum;
uint32_t kernelNodeNum;
std::unordered_map<uint32_t, std::pair<uint64_t, uint64_t>> skRangeInStream;
SuperKernelGraph* graph_;
const SuperKernelOptionsManager* opts_ = nullptr;
DeadlockFailReason deadlockReason_ = DeadlockFailReason::NOT_FIND_DEADLOCK;
};
#endif