* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file sk_scope_split.cpp
* \brief Implementation of multi-pass scope splitting
*/
#include "sk_scope_split.h"
#include "sk_common.h"
#include <algorithm>
#include <unordered_map>
EventOnlyStreamRemovePass::EventOnlyStreamRemovePass(SuperKernelGraph& inputGraph)
: ScopeSplitPass(inputGraph), markedCount_(0) {}
bool EventOnlyStreamRemovePass::IsEventNode(SuperKernelBaseNode* node) const {
if (node == nullptr) {
return false;
}
SkNodeType nodeType = node->GetNodeType();
return (nodeType == SkNodeType::NODE_NOTIFY ||
nodeType == SkNodeType::NODE_WAIT ||
nodeType == SkNodeType::NODE_RESET ||
nodeType == SkNodeType::NODE_MEMORY_WRITE ||
nodeType == SkNodeType::NODE_MEMORY_WAIT);
}
void EventOnlyStreamRemovePass::CollectNodesPerStream(
const SuperKernelScopeInfo& scope,
std::unordered_map<uint32_t, std::vector<SuperKernelBaseNode*>>& streamNodes) {
streamNodes.clear();
for (auto* node : scope.GetNodes()) {
if (node == nullptr) {
continue;
}
uint32_t streamIdx = node->GetStreamIdxInGraph();
streamNodes[streamIdx].push_back(node);
}
}
bool EventOnlyStreamRemovePass::IsStreamAllEventNodes(
const std::vector<SuperKernelBaseNode*>& nodes) const {
if (nodes.empty()) {
return false;
}
for (auto* node : nodes) {
if (node == nullptr) {
continue;
}
if (!IsEventNode(node)) {
return false;
}
}
return true;
}
uint32_t EventOnlyStreamRemovePass::ProcessScope(SuperKernelScopeInfo& scope) {
if (scope.GetNodes().empty()) {
return 0;
}
std::unordered_map<uint32_t, std::vector<SuperKernelBaseNode*>> streamNodes;
CollectNodesPerStream(scope, streamNodes);
uint32_t scopeMarkedCount = 0;
for (const auto& pair : streamNodes) {
uint32_t streamIdx = pair.first;
const auto& nodes = pair.second;
if (IsStreamAllEventNodes(nodes)) {
for (auto* node : nodes) {
if (node != nullptr && node->IsFusible()) {
node->SetIsFusible(false);
node->SetFusionFailReason(FusionFailReason::ISOLATED_EVENT);
scopeMarkedCount++;
SK_LOGI("[EventOnlyStreamRemove] Marked event node %lu in stream %u as non-fusible",
node->GetNodeId(), streamIdx);
}
}
SK_LOGI("[EventOnlyStreamRemove] Stream %u in scope has all event nodes (%zu nodes), marked as non-fusible",
streamIdx, nodes.size());
}
}
return scopeMarkedCount;
}
bool EventOnlyStreamRemovePass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[EventOnlyStreamRemove] %s pass starting execution", GetName().c_str());
markedCount_ = 0;
SK_LOGI("[EventOnlyStreamRemove] processing %zu scopes", scopes.size());
for (size_t i = 0; i < scopes.size(); ++i) {
uint32_t scopeMarked = ProcessScope(scopes[i]);
markedCount_ += scopeMarked;
SK_LOGI("[EventOnlyStreamRemove] Scope %zu: after processing, %zu nodes, %zu streams, marked %u nodes",
i, scopes[i].GetNodes().size(), scopes[i].GetScopeStreamInfos().size(), scopeMarked);
}
if (markedCount_ > 0) {
SK_LOGI("[EventOnlyStreamRemove] Marked %u event-only stream nodes as non-fusible, requesting re-split",
markedCount_);
scopes.clear();
RequestResplit();
}
SK_LOGI("[EventOnlyStreamRemove] %s pass completed, marked %u event-only stream nodes total",
GetName().c_str(), markedCount_);
return true;
}
PerOpMaxCoreSplitPass::PerOpMaxCoreSplitPass(SuperKernelGraph& inputGraph)
: ScopeSplitPass(inputGraph) {}
bool PerOpMaxCoreSplitPass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[PerOpMaxCore] %s pass starting", GetName().c_str());
scopes.clear();
const auto& headNodes = graph_.GetHeadNodes();
for (uint64_t headNodeId : headNodes) {
uint64_t nodeId = headNodeId;
while (nodeId != INVALID_TASK_ID) {
SuperKernelBaseNode* node = graph_.GetNodeById(nodeId);
if (node == nullptr) break;
if (node->GetNodeType() == SkNodeType::NODE_KERNEL && node->IsFusible()) {
SuperKernelScopeInfo scope;
scope.AddNode(node);
scope.MutableBreakInfo().SetReason(ScopeBreakReason::DEBUG_PER_OP_MAX_CORE);
RebuildStreamInfos(scope);
scopes.push_back(std::move(scope));
SK_LOGI("[PerOpMaxCore] Created scope for kernel %lu (scopeIdx=%zu)",
node->GetNodeId(), scopes.size() - 1);
}
nodeId = node->GetNextNodeId();
}
}
SK_LOGI("[PerOpMaxCore] Completed, %zu scopes", scopes.size());
return true;
}
void ScopeSplitPass::PrintScopeDetails(const std::vector<SuperKernelScopeInfo>& scopes,
const SuperKernelGraph& graph,
const char* passName) {
SK_LOGI("========== Scope split results begin: pass=%s, totalScopes=%zu ==========",
passName, scopes.size());
for (size_t i = 0; i < scopes.size(); ++i) {
const auto& scope = scopes[i];
std::string scopeNames = GetScopeNamesFromBitFlags(scope.GetScopeBitFlags(), graph);
SK_LOGI("Scope %zu (scopeId=%u): %zu nodes, %zu streams, scopeBitFlags=%s, scopeNames=[%s]",
i, scope.GetScopeId(), scope.GetNodes().size(), scope.GetScopeStreamInfos().size(),
graph.BitsetToString(scope.GetScopeBitFlags()).c_str(),
scopeNames.c_str());
SK_LOGI(" BreakInfo: %s", scope.GetBreakInfo().Format().c_str());
PrintScopeNodes(i, scope);
PrintScopeStreamInfos(i, scope);
}
SK_LOGI("========== Scope split results end: pass=%s ==========", passName);
}
void ScopeSplitPass::PrintScopeResults(const std::vector<SuperKernelScopeInfo>& scopes,
const SuperKernelGraph& graph,
const char* passName) {
{
SK_LOG_CONTEXT_SIMPLE("sk_scope_split.log");
PrintScopeDetails(scopes, graph, passName);
}
PrintScopeDetails(scopes, graph, passName);
}
std::string ScopeSplitPass::GetScopeNamesFromBitFlags(const std::bitset<MAX_SCOPE_NUM>& scopeBitFlags,
const SuperKernelGraph& graph) {
std::string scopeNames;
for (size_t bit = 0; bit < MAX_SCOPE_NUM && bit < scopeBitFlags.size(); ++bit) {
if (scopeBitFlags.test(bit)) {
std::string name;
if (graph.GetScopeNameByIdx(static_cast<uint32_t>(bit), name)) {
if (!scopeNames.empty()) scopeNames += "_";
scopeNames += name;
}
}
}
return scopeNames.empty() ? "none" : scopeNames;
}
void ScopeSplitPass::PrintScopeNodes(size_t scopeIdx, const SuperKernelScopeInfo& scope) {
const auto& nodes = scope.GetNodes();
SK_LOGI(" Scope %zu nodes: %zu", scopeIdx, nodes.size());
for (size_t i = 0; i < nodes.size(); ++i) {
SK_LOGI(" [%zu] %s", i, nodes[i]->Format().c_str());
}
}
void ScopeSplitPass::PrintScopeStreamInfos(size_t scopeIdx, const SuperKernelScopeInfo& scope) {
const auto& scopeStreamInfos = scope.GetScopeStreamInfos();
for (size_t j = 0; j < scopeStreamInfos.size(); ++j) {
const auto& streamInfo = scopeStreamInfos[j];
SK_LOGI(" Scope %zu StreamInfo[%zu]: streamIdx=%u, headNode=%lu, tailNode=%lu, nodeSize=%lu",
scopeIdx, j, streamInfo.streamIdx, streamInfo.headNodeIdx,
streamInfo.tailNodeIdx, streamInfo.nodeSize);
}
}
void ScopeSplitPass::RequestResplit() {
if (splitter_ != nullptr) {
splitter_->RequestResplit();
}
}
void ScopeSplitPass::RebuildStreamInfos(SuperKernelScopeInfo& scope) {
std::vector<ScopeStreamInfo> newStreamInfos;
for (const auto* node : scope.GetNodes()) {
uint32_t streamIdx = node->GetStreamIdxInGraph();
auto it = std::find_if(newStreamInfos.begin(), newStreamInfos.end(),
[streamIdx](const ScopeStreamInfo& info) {
return info.streamIdx == streamIdx;
});
if (it == newStreamInfos.end()) {
ScopeStreamInfo newInfo;
newInfo.streamIdx = streamIdx;
newInfo.headNodeIdx = node->GetNodeId();
newInfo.tailNodeIdx = node->GetNodeId();
newInfo.nodeSize = 1;
newStreamInfos.push_back(std::move(newInfo));
} else {
it->tailNodeIdx = node->GetNodeId();
it->nodeSize++;
}
}
scope.SetScopeStreamInfos(std::move(newStreamInfos));
}
std::vector<uint64_t> ScopeSplitPass::GetKernelNodeIds(const SuperKernelScopeInfo& scope) {
std::vector<uint64_t> kernelIds;
for (const auto* node : scope.GetNodes()) {
if (node != nullptr && node->GetNodeType() == SkNodeType::NODE_KERNEL) {
if (node->IsScopeNode()) {
continue;
}
kernelIds.push_back(node->GetNodeId());
}
}
std::sort(kernelIds.begin(), kernelIds.end());
return kernelIds;
}
bool ScopeSplitPass::HasSameKernelNodes(const SuperKernelScopeInfo& originScope, const SuperKernelScopeInfo& currentScope) {
std::vector<uint64_t> originKernels = GetKernelNodeIds(originScope);
std::vector<uint64_t> currentKernels = GetKernelNodeIds(currentScope);
std::sort(originKernels.begin(), originKernels.end());
std::sort(currentKernels.begin(), currentKernels.end());
return originKernels == currentKernels;
}
InitialScopeSplitPass::InitialScopeSplitPass(SuperKernelGraph& inputGraph, SkHeapType heapType)
: ScopeSplitPass(inputGraph), nodeHeap_(inputGraph, heapType) {}
void InitialScopeSplitPass::InitStreamStates() {
SK_LOGI("[SplitScope] initializing stream states for %s", GetName().c_str());
const auto& streams = graph_.GetStreams();
const auto& headNodes = graph_.GetHeadNodes();
streamStates_.clear();
for (size_t i = 0; i < streams.size(); ++i) {
streamStates_[i] = StreamState();
streamStates_[i].currentNodeId = headNodes[i];
SK_LOGI(" Stream %u: initialized with head node %lu",
static_cast<uint32_t>(i), headNodes[i]);
}
SK_LOGI("[SplitScope] completed initializing %zu streams", streamStates_.size());
}
bool InitialScopeSplitPass::ResetStreamStates() {
SK_LOGI("[SplitScope] resetting all stream states for next scope");
streamBreakInfos_.clear();
currentScopeBreakInfo_ = ScopeBreakInfo();
scopeStartBreakInfo_ = ScopeBreakInfo();
for (auto& pair : streamStates_) {
uint32_t streamIdx = pair.first;
pair.second.isTerminated = false;
pair.second.isSuspended = false;
pair.second.waitingForNotify = INVALID_TASK_ID;
SK_LOGI(" Stream %u: reset, currentNodeId=%lu",
streamIdx, pair.second.currentNodeId);
}
SK_LOGI("[SplitScope] starting to skip unfusible nodes");
if (!SkipUnfusibleNodes()) {
SK_LOGE("[SplitScope] Failed to skip unfusible nodes during stream reset");
return false;
}
SK_LOGI("[SplitScope] completed reset, active streams will proceed to next scope");
return true;
}
* \brief Skip all unfusible nodes for all streams
*
* This function is called at the beginning of each scope to advance each stream's
* current position past all unfusible nodes, so that we start processing from a
* fusible node for the next scope.
*
* Special handling for WAIT nodes:
* - If the corresponding NOTIFY node has already been visited, skip the WAIT node
* - If the NOTIFY node has not been visited, suspend the stream and keep the WAIT
* node as the current node (do not advance)
*
* Special handling for NOTIFY nodes:
* - Always add NOTIFY nodes to visitedNotifies_ even if they are unfusible, so that
* dependent WAIT nodes can be properly processed
*/
bool InitialScopeSplitPass::SkipUnfusibleNodes() {
SK_LOGI("[SkipUnfusible] starting to skip unfusible nodes for all streams");
for (auto& pair : streamStates_) {
uint32_t streamIdx = pair.first;
if (!SkipUnfusibleNodesForStream(streamIdx)) {
SK_LOGE("[SkipUnfusible] Failed to skip unfusible nodes for stream %u", streamIdx);
return false;
}
}
SK_LOGI("[SkipUnfusible] completed skipping unfusible nodes");
return true;
}
* \brief Skip unfusible nodes for a single stream
*
* \param streamIdx The stream index to process
*
* \return true if the stream has more nodes to process (either fusible nodes or
* it's suspended on an unfusible WAIT node), false if the stream is terminated
*/
bool InitialScopeSplitPass::SkipUnfusibleNodesForStream(uint32_t streamIdx) {
StreamState& state = streamStates_[streamIdx];
uint32_t skipCount = 0;
if (state.currentNodeId == INVALID_TASK_ID) {
SK_LOGI("Stream %u: Already at end (currentNodeId=INVALID), nothing to skip", streamIdx);
return true;
}
while (state.currentNodeId != INVALID_TASK_ID) {
SuperKernelBaseNode* node = graph_.GetNodeById(state.currentNodeId);
if (node == nullptr) {
SK_LOGE("Stream %u: node %lu not found (graph integrity error)", streamIdx, state.currentNodeId);
return false;
}
if (node->IsFusible()) {
SK_LOGI("Stream %u: Found fusible node %s, stop skipping", streamIdx, node->Format().c_str());
break;
}
bool shouldContinue = ProcessUnfusibleNodeForSkip(streamIdx, node);
if (!shouldContinue) {
SkNodeType nodeType = node->GetNodeType();
if (nodeType == SkNodeType::NODE_WAIT) {
return true;
}
return false;
}
state.currentNodeId = node->GetNextNodeId();
skipCount++;
}
if (skipCount > 0) {
SK_LOGI("Stream %u: Skipped %u unfusible nodes, next node=%lu", streamIdx, skipCount, state.currentNodeId);
}
return true;
}
* \brief Process an unfusible node during skip phase
* \param streamIdx The stream index
* \param node The unfusible node to process
* \return true if should continue skipping, false to stop
*/
bool InitialScopeSplitPass::ProcessUnfusibleNodeForSkip(uint32_t streamIdx, SuperKernelBaseNode* node) {
SkNodeType nodeType = node->GetNodeType();
if (nodeType == SkNodeType::NODE_WAIT) {
bool shouldSkip = ProcessUnfusibleWaitNode(streamIdx, node);
if (!shouldSkip) {
SK_LOGI("Stream %u: Suspended on unfusible wait node %s", streamIdx, node->Format().c_str());
return false;
}
SK_LOGI("Stream %u: Skipping unfusible wait node %s", streamIdx, node->Format().c_str());
} else if (nodeType == SkNodeType::NODE_NOTIFY) {
if (!HandleUnfusibleNotifyNode(node, streamIdx)) {
SK_LOGE("Stream %u: Failed to handle unfusible notify node %s, abort processing",
streamIdx, node->Format().c_str());
return false;
}
} else {
processedNodes_.insert(node->GetNodeId());
SK_LOGI("Stream %u: Skipping unfusible node %s (type=%d), added to processedNodes",
streamIdx, node->Format().c_str(), static_cast<int>(nodeType));
}
return true;
}
* \brief Process an unfusible WAIT node and determine if it should be skipped
*
* \param streamIdx The stream index
* \param waitNode The unfusible WAIT node to process
*
* \return true if the WAIT node should be skipped (advance currentNodeId),
* false if the stream should be suspended (keep currentNodeId)
*
* Processing logic:
* 1. Find the corresponding NOTIFY node
* 2. If NOTIFY node has already been visited: skip the WAIT node and add to processedNodes_
* 3. If NOTIFY node has not been visited: suspend the stream and keep the WAIT node
* as the current node, so it will be processed again later when the NOTIFY is visited
*/
bool InitialScopeSplitPass::ProcessUnfusibleWaitNode(uint32_t streamIdx, SuperKernelBaseNode* waitNode) {
uint64_t notifyId = waitNode->GetCorrespondingNotifyNodeId();
if (notifyId == INVALID_TASK_ID) {
SK_LOGI("Stream %u: Wait node %s's notify node %lu not found in graph",
streamIdx, waitNode->Format().c_str(), notifyId);
return true;
}
SuperKernelBaseNode* notifyNode = graph_.GetNodeById(notifyId);
if (notifyNode == nullptr) {
SK_LOGE("Stream %u: Wait node %s's notify node %lu not found",
streamIdx, waitNode->Format().c_str(), notifyId);
return true;
}
if (visitedNotifies_.find(notifyId) != visitedNotifies_.end()) {
processedNodes_.insert(waitNode->GetNodeId());
SK_LOGI("Stream %u: Unfusible wait node %s: notify %lu already visited, skipping and added to processedNodes_",
streamIdx, waitNode->Format().c_str(), notifyId);
return true;
} else {
StreamState& state = streamStates_[streamIdx];
uint64_t eventId = notifyNode->GetEventId();
state.isSuspended = true;
state.waitingForNotify = eventId;
SK_LOGI("Stream %u: Unfusible wait node %s: notify %lu not visited, suspending stream (eventId=0x%lx)",
streamIdx, waitNode->Format().c_str(), notifyId, eventId);
return false;
}
}
bool InitialScopeSplitPass::DetermineCurrentScopeBitFlags() {
SK_LOGI("Starting to determine scope bit flags");
uint64_t minNodeIdx = UINT64_MAX;
SuperKernelBaseNode* minNode = nullptr;
uint32_t activeStreams = 0;
for (const auto& pair : streamStates_) {
uint32_t streamIdx = pair.first;
if (!pair.second.isTerminated && !pair.second.isSuspended &&
pair.second.currentNodeId != INVALID_TASK_ID) {
activeStreams++;
if (pair.second.currentNodeId < minNodeIdx) {
SuperKernelBaseNode* node = graph_.GetNodeById(pair.second.currentNodeId);
if (node != nullptr && node->IsFusible()) {
minNodeIdx = pair.second.currentNodeId;
minNode = node;
SK_LOGI("Stream %u: candidate min node %s (nodeIdx=%lu)",
streamIdx, node->Format().c_str(), minNodeIdx);
}
}
}
}
if (minNode != nullptr) {
currentScopeBitFlags_ = minNode->GetScopeBitFlags();
std::string scopeNames = ScopeSplitPass::GetScopeNamesFromBitFlags(currentScopeBitFlags_, graph_);
SK_LOGI("Found min node %s, scopeBitFlags=%s, scopeNames=[%s], active streams=%u",
minNode->Format().c_str(),
graph_.BitsetToString(currentScopeBitFlags_).c_str(),
scopeNames.c_str(), activeStreams);
return true;
}
LogFusibleNodeSearchResult();
return false;
}
void InitialScopeSplitPass::InitNodeHeap() {
SK_LOGI("[SplitScope] initializing node heap for current scope");
nodeHeap_.reset();
size_t addedNodes = 0;
for (auto& pair : streamStates_) {
TryAddNodeToHeap(pair.first);
}
SK_LOGI("[SplitScope] node heap initialized with %zu nodes", nodeHeap_.size());
}
void InitialScopeSplitPass::TryAddNodeToHeap(uint32_t streamIdx) {
StreamState& state = streamStates_[streamIdx];
if (state.isTerminated || state.isSuspended || state.currentNodeId == INVALID_TASK_ID) {
SK_LOGI("Stream %u: skipped (terminated=%d, suspended=%d, currentNodeId=%lu)",
streamIdx, state.isTerminated, state.isSuspended, state.currentNodeId);
return;
}
SuperKernelBaseNode* node = graph_.GetNodeById(state.currentNodeId);
if (node == nullptr) {
SK_LOGE("Stream %u: node %lu not found, terminating stream (graph integrity error: nodeId not in graph)",
streamIdx, state.currentNodeId);
state.currentNodeId = INVALID_TASK_ID;
return;
}
if (processedNodes_.find(state.currentNodeId) != processedNodes_.end()) {
SK_LOGI("Stream %u: node %lu already processed, advancing",
streamIdx, state.currentNodeId);
state.currentNodeId = node->GetNextNodeId();
TryAddNodeToHeap(streamIdx);
return;
}
if (!node->IsFusible()) {
SK_LOGI("Stream %u: node %s is not fusible, terminating stream",
streamIdx, node->Format().c_str());
state.isTerminated = true;
currentScope_->MutableBreakInfo()
.SetReason(ScopeBreakReason::UNFUSIBLE_NODE)
.SetTriggerNode(node->GetNodeId(), streamIdx)
.SetFusionFailReason(node->GetFusionFailReasonInfo())
.SetDetail("unfused node causes scope break");
SK_LOGI("Current scope has %zu nodes", currentScope_->GetNodes().size());
std::string detail = "Node " + std::to_string(node->GetNodeId()) + " is unfusible: " +
FusionFailReasonToStr(node->GetFusionFailReasonInfo());
return;
}
if (node->GetScopeBitFlags() != currentScopeBitFlags_) {
SK_LOGI("Stream %u: node %s scopeBitFlags mismatch, terminating stream",
streamIdx, node->Format().c_str());
state.isTerminated = true;
return;
}
if (node->GetNodeType() == SkNodeType::NODE_WAIT) {
HandleWaitNode(node, streamIdx);
return;
}
nodeHeap_.push(state.currentNodeId);
SK_LOGI("Stream %u: added node %s to heap, heap size=%zu",
streamIdx, node->Format().c_str(), nodeHeap_.size());
}
void InitialScopeSplitPass::HandleWaitNode(SuperKernelBaseNode* waitNode, uint32_t streamIdx) {
StreamState& state = streamStates_[streamIdx];
uint64_t notifyId = waitNode->GetCorrespondingNotifyNodeId();
if (notifyId == INVALID_TASK_ID) {
nodeHeap_.push(state.currentNodeId);
SK_LOGI("Stream %u: Wait node %s's notify node %lu not found, adding wait node to heap to avoid hanging",
streamIdx, waitNode->Format().c_str(), notifyId);
return;
}
SuperKernelBaseNode* notifyNode = graph_.GetNodeById(notifyId);
if (notifyNode == nullptr) {
SK_LOGE("Stream %u: Wait node %s: failed to get notify node by id %lu",
streamIdx, waitNode->Format().c_str(), notifyId);
return;
}
uint64_t eventId = notifyNode->GetEventId();
if (visitedNotifies_.find(notifyId) != visitedNotifies_.end()) {
nodeHeap_.push(state.currentNodeId);
SK_LOGI("Stream %u: Wait node %s: notify %lu already visited, adding to heap",
streamIdx, waitNode->Format().c_str(), notifyId);
} else {
state.isSuspended = true;
state.waitingForNotify = eventId;
SK_LOGI("Stream %u: Wait node %s: notify %lu not visited, suspending stream (eventId=0x%lx)",
streamIdx, waitNode->Format().c_str(), notifyId, eventId);
}
}
void InitialScopeSplitPass::ProcessNotifyNode(SuperKernelBaseNode* notifyNode) {
uint64_t eventId = notifyNode->GetEventId();
uint64_t notifyNodeId = notifyNode->GetNodeId();
const EventInfos* eventInfo = graph_.GetEventInfo(eventId);
if (eventInfo == nullptr) {
SK_LOGE("Event info for eventId=0x%lx not found (notifyNode: %s)",
eventId, notifyNode->Format().c_str());
return;
}
visitedNotifies_.insert(notifyNodeId);
SK_LOGI("Notify node %s (eventId=0x%lx) added to visited, %zu wait nodes to check",
notifyNode->Format().c_str(), eventId, eventInfo->waitNodeIdList.size());
uint32_t resumedCount = 0;
for (uint64_t waitNodeId : eventInfo->waitNodeIdList) {
SuperKernelBaseNode* waitNode = graph_.GetNodeById(waitNodeId);
if (waitNode == nullptr) {
SK_LOGE("Wait node %lu not found (graph integrity error: eventId=0x%lx references non-existent node)",
waitNodeId, eventId);
continue;
}
uint32_t streamIdx = waitNode->GetStreamIdxInGraph();
StreamState& state = streamStates_[streamIdx];
if (state.isSuspended && state.waitingForNotify == eventId) {
state.isSuspended = false;
state.waitingForNotify = INVALID_TASK_ID;
TryAddNodeToHeap(streamIdx);
resumedCount++;
SK_LOGI("Resumed stream %u for wait node %s",
streamIdx, waitNode->Format().c_str());
}
}
SK_LOGI("Resumed %u suspended streams", resumedCount);
}
void InitialScopeSplitPass::ProcessResetNode(SuperKernelBaseNode* resetNode) {
uint64_t eventId = resetNode->GetEventId();
const EventInfos* eventInfo = graph_.GetEventInfo(eventId);
if (eventInfo == nullptr) {
SK_LOGE("Event info for eventId=0x%lx not found (resetNode: %s)",
eventId, resetNode->Format().c_str());
return;
}
if (eventInfo->notifyNodeId != INVALID_TASK_ID) {
visitedNotifies_.erase(eventInfo->notifyNodeId);
SK_LOGI("Erased notify %lu from visited (eventId=0x%lx)",
eventInfo->notifyNodeId, eventId);
}
uint32_t suspendedCount = 0;
for (uint64_t waitNodeId : eventInfo->waitNodeIdList) {
SuperKernelBaseNode* waitNode = graph_.GetNodeById(waitNodeId);
if (waitNode == nullptr) {
SK_LOGE("Wait node %lu not found (graph integrity error: eventId=0x%lx references non-existent node)",
waitNodeId, eventId);
continue;
}
uint32_t streamIdx = waitNode->GetStreamIdxInGraph();
if (streamStates_[streamIdx].waitingForNotify == eventId) {
streamStates_[streamIdx].waitingForNotify = INVALID_TASK_ID;
streamStates_[streamIdx].isSuspended = true;
suspendedCount++;
SK_LOGE("Suspended stream %u for wait node %s (eventId=0x%lx)",
streamIdx, waitNode->Format().c_str(), eventId);
}
}
SK_LOGE("Suspended %u streams due to reset", suspendedCount);
}
bool InitialScopeSplitPass::HandleUnfusibleNotifyNode(SuperKernelBaseNode* notifyNode, uint32_t streamIdx) {
visitedNotifies_.insert(notifyNode->GetNodeId());
processedNodes_.insert(notifyNode->GetNodeId());
SK_LOGI("Stream %u: Unfusible notify node %s added to visitedNotifies and processedNodes",
streamIdx, notifyNode->Format().c_str());
return ResumeSuspendedWaitStreams(notifyNode, streamIdx);
}
bool InitialScopeSplitPass::ResumeSuspendedWaitStreams(SuperKernelBaseNode* notifyNode, uint32_t notifyStreamIdx) {
std::vector<uint64_t> waitNodeIds = notifyNode->GetCorrespondingWaitNodeIds();
uint64_t eventId = notifyNode->GetEventId();
for (uint64_t waitNodeId : waitNodeIds) {
SuperKernelBaseNode* waitNode = graph_.GetNodeById(waitNodeId);
if (waitNode == nullptr) {
SK_LOGE("Stream %u: Notify node %s's wait node %lu not found",
notifyStreamIdx, notifyNode->Format().c_str(), waitNodeId);
return false;
}
uint32_t waitStreamIdx = waitNode->GetStreamIdxInGraph();
const StreamState& waitStreamState = streamStates_[waitStreamIdx];
if (waitStreamState.isSuspended && waitStreamState.waitingForNotify == eventId) {
SK_LOGI("Stream %u: Notify node %s visited, processing suspended wait stream %u (wait node %s)",
notifyStreamIdx, notifyNode->Format().c_str(), waitStreamIdx,
waitNode->Format().c_str());
StreamState& mutableWaitState = streamStates_[waitStreamIdx];
mutableWaitState.isSuspended = false;
mutableWaitState.waitingForNotify = INVALID_TASK_ID;
if (!SkipUnfusibleNodesForStream(waitStreamIdx)) {
SK_LOGE("Stream %u: Failed to skip unfusible nodes for suspended wait stream %u (wait node %s)",
notifyStreamIdx, waitStreamIdx, waitNode->Format().c_str());
return false;
}
}
}
return true;
}
void InitialScopeSplitPass::LogFusibleNodeSearchResult() {
SK_LOGI("[FindFusibleNodes] No fusible nodes found in any stream");
bool allStreamsFinished = true;
std::vector<uint32_t> nonFinishedStreamIdxs;
for (const auto& pair : streamStates_) {
uint32_t streamIdx = pair.first;
const StreamState& state = pair.second;
if (state.currentNodeId != INVALID_TASK_ID) {
allStreamsFinished = false;
nonFinishedStreamIdxs.push_back(streamIdx);
}
}
if (allStreamsFinished) {
SK_LOGI("[FindFusibleNodes] All streams have finished (all currentNodeId are INVALID_TASK_ID)");
} else {
SK_LOGE("[FindFusibleNodes] Found %zu streams with non-INVALID currentNodeId but no fusible nodes",
nonFinishedStreamIdxs.size());
for (uint32_t streamIdx : nonFinishedStreamIdxs) {
const StreamState& state = streamStates_.at(streamIdx);
std::string stateInfo = state.FormatStreamStateInfo();
SK_LOGE("[FindFusibleNodes] Stream %u state: %s", streamIdx, stateInfo.c_str());
if (state.currentNodeId != INVALID_TASK_ID) {
SuperKernelBaseNode* node = graph_.GetNodeById(state.currentNodeId);
if (node != nullptr) {
bool isFusible = node->IsFusible();
SkNodeType nodeType = node->GetNodeType();
std::string scopeBitFlagsStr = graph_.BitsetToString(node->GetScopeBitFlags());
SK_LOGE("[FindFusibleNodes] Stream %u current node: %s, isFusible=%d, nodeType=%d, scopeBitFlags=%s",
streamIdx, node->Format().c_str(), isFusible,
static_cast<int>(nodeType), scopeBitFlagsStr.c_str());
}
}
}
}
}
void InitialScopeSplitPass::AddStreamInfoToScope(SuperKernelScopeInfo& scopeInfo, SuperKernelBaseNode* node) {
uint32_t streamIdx = node->GetStreamIdxInGraph();
auto scopeStreamInfos = scopeInfo.GetScopeStreamInfos();
auto it = std::find_if(scopeStreamInfos.begin(), scopeStreamInfos.end(),
[streamIdx](const ScopeStreamInfo& info) {
return info.streamIdx == streamIdx;
});
if (it == scopeStreamInfos.end()) {
ScopeStreamInfo newInfo;
newInfo.streamIdx = streamIdx;
newInfo.headNodeIdx = node->GetNodeId();
newInfo.tailNodeIdx = node->GetNodeId();
newInfo.nodeSize = 1;
scopeStreamInfos.push_back(std::move(newInfo));
} else {
it->tailNodeIdx = node->GetNodeId();
it->nodeSize++;
}
scopeInfo.SetScopeStreamInfos(std::move(scopeStreamInfos));
}
bool InitialScopeSplitPass::BuildCurrentScope(SuperKernelScopeInfo& scopeInfo) {
SK_LOGI("[SplitScope] starting to build current scope");
scopeInfo.SetScopeBitFlags(currentScopeBitFlags_);
currentScope_ = &scopeInfo;
while (!nodeHeap_.empty()) {
uint64_t nodeId = nodeHeap_.pop();
SuperKernelBaseNode* node = graph_.GetNodeById(nodeId);
if (node == nullptr) {
SK_LOGE("Node %lu not found in graph (graph integrity error)", nodeId);
return false;
}
if (processedNodes_.find(nodeId) != processedNodes_.end()) {
continue;
}
uint32_t streamIdx = node->GetStreamIdxInGraph();
scopeInfo.AddNode(node);
SK_LOGI("Added node %s to scope (count=%zu)",
node->Format().c_str(), scopeInfo.GetNodes().size());
AddStreamInfoToScope(scopeInfo, node);
processedNodes_.insert(nodeId);
uint64_t nextNodeId = node->GetNextNodeId();
streamStates_[streamIdx].currentNodeId = nextNodeId;
if (node->GetNodeType() == SkNodeType::NODE_NOTIFY) {
ProcessNotifyNode(node);
} else if (node->GetNodeType() == SkNodeType::NODE_RESET) {
ProcessResetNode(node);
}
TryAddNodeToHeap(streamIdx);
}
SK_LOGI("[SplitScope] built scope with %zu nodes, %zu stream infos",
scopeInfo.GetNodes().size(), scopeInfo.GetScopeStreamInfos().size());
currentScope_ = nullptr;
return true;
}
bool InitialScopeSplitPass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[SplitScope] %s pass starting execution", GetName().c_str());
scopes.clear();
processedNodes_.clear();
visitedNotifies_.clear();
streamBreakInfos_.clear();
currentScopeBreakInfo_ = ScopeBreakInfo();
scopeStartBreakInfo_ = ScopeBreakInfo();
SK_LOGI("%s: Initializing stream states", GetName().c_str());
InitStreamStates();
if (!SkipUnfusibleNodes()) {
SK_LOGE("%s: Failed to skip unfusible nodes during initialization (graph integrity error)",
GetName().c_str());
return false;
}
size_t scopeCount = 0;
while (true) {
if (!DetermineCurrentScopeBitFlags()) {
SK_LOGI("%s: No more fusible nodes, initial scope split complete", GetName().c_str());
break;
}
InitNodeHeap();
if (nodeHeap_.empty()) {
SK_LOGI("%s: Node heap is empty, initial scope split complete", GetName().c_str());
break;
}
SuperKernelScopeInfo scopeInfo;
if (BuildCurrentScope(scopeInfo)) {
if (!scopeInfo.GetNodes().empty()) {
std::string scopeNames = ScopeSplitPass::GetScopeNamesFromBitFlags(scopeInfo.GetScopeBitFlags(), graph_);
SK_LOGI("%s: Built scope %zu with %zu nodes, %zu streams, scopeNames=[%s], breakReason=[%s]",
GetName().c_str(), scopeCount, scopeInfo.GetNodes().size(),
scopeInfo.GetScopeStreamInfos().size(), scopeNames.c_str(),
scopeInfo.GetBreakInfo().Format().c_str());
scopes.push_back(std::move(scopeInfo));
} else {
std::string scopeNames = ScopeSplitPass::GetScopeNamesFromBitFlags(scopeInfo.GetScopeBitFlags(), graph_);
SK_LOGI("%s: Empty scope %zu skipped, scopeNames=[%s], continuing",
GetName().c_str(), scopeCount, scopeNames.c_str());
}
scopeCount++;
} else {
SK_LOGE("%s: Failed to build scope %zu (graph integrity error), stopping scope split",
GetName().c_str(), scopeCount);
return false;
}
SK_LOGI("%s: Resetting stream states for next scope", GetName().c_str());
if (!ResetStreamStates()) {
SK_LOGE("%s: Failed to reset stream states (graph integrity error), stopping scope split",
GetName().c_str());
return false;
}
}
SK_LOGI("[SplitScopeResult] %s pass completed, total scopes generated: %zu", GetName().c_str(), scopes.size());
PrintScopeResults(scopes, graph_, GetName().c_str());
return true;
}
DeadlockRefinePass::DeadlockRefinePass(SuperKernelGraph& inputGraph, SuperKernelOptionsManager& opts)
: ScopeSplitPass(inputGraph), lockDetector_(graph_, opts)
{}
bool DeadlockRefinePass::FindDeadlockInScope(const SuperKernelScopeInfo& scope,
SuperKernelBaseNode** deadlockNode,
SuperKernelBaseNode** deadlockWaitNode) {
const auto& nodes = scope.GetNodes();
SK_LOGI("[DeadlockRefine] checking scope with %zu nodes for deadlock", nodes.size());
lockDetector_.Reset();
SuperKernelBaseNode* lastWaitNode = nullptr;
for (size_t i = 0; i < nodes.size(); ++i) {
const auto* node = nodes[i];
if (node->GetNodeType() == SkNodeType::NODE_WAIT) {
lastWaitNode = const_cast<SuperKernelBaseNode*>(node);
SK_LOGI("Found Wait node %s at position %zu",
node->Format().c_str(), i);
}
if (!lockDetector_.IsFusible(*const_cast<SuperKernelBaseNode*>(node))) {
*deadlockNode = const_cast<SuperKernelBaseNode*>(node);
*deadlockWaitNode = lastWaitNode;
SK_LOGI("Deadlock detected at node %s (position %zu)",
node->Format().c_str(), i);
if (lastWaitNode != nullptr) {
SK_LOGI(" Nearest Wait node before deadlock: %s",
lastWaitNode->Format().c_str());
}
return true;
}
SK_LOGI("Node %s passed deadlock check", node->Format().c_str());
}
SK_LOGI("[DeadlockRefine] no deadlock found in scope");
return false;
}
void DeadlockRefinePass::SplitScopeAtWaitNode(const SuperKernelScopeInfo& scope,
SuperKernelBaseNode* waitNode,
SuperKernelScopeInfo& scopeBefore,
SuperKernelScopeInfo& scopeAfter) {
scopeBefore.SetScopeBitFlags(scope.GetScopeBitFlags());
scopeAfter.SetScopeBitFlags(scope.GetScopeBitFlags());
bool foundWait = false;
for (const auto* node : scope.GetNodes()) {
if (node == waitNode) {
foundWait = true;
continue;
}
if (!foundWait) {
scopeBefore.AddNode(const_cast<SuperKernelBaseNode*>(node));
} else {
scopeAfter.AddNode(const_cast<SuperKernelBaseNode*>(node));
}
}
RebuildStreamInfos(scopeBefore);
RebuildStreamInfos(scopeAfter);
}
* @brief Setup break info for scopeAfter after deadlock split
*/
static void SetupScopeBeforeBreakInfo(SuperKernelScopeInfo& scopeBefore,
const ScopeBreakInfo& originalBreakInfo,
uint16_t originalScopeId,
SuperKernelBaseNode* deadlockNode,
SuperKernelBaseNode* deadlockWaitNode,
bool hasSameKernelAsOriginal) {
if (hasSameKernelAsOriginal) {
scopeBefore.MutableBreakInfo() = originalBreakInfo;
scopeBefore.MutableBreakInfo().SetParentScopeId(originalScopeId);
SK_LOGI("[DeadlockRefine] scopeBefore kernel same as original, inherits break info");
} else {
scopeBefore.MutableBreakInfo()
.SetReason(ScopeBreakReason::DEADLOCK_DETECTED)
.SetTriggerNode(deadlockNode->GetNodeId(), deadlockNode->GetStreamIdxInGraph())
.SetFusionFailReason(FusionFailReason::EXIST_DEADLOCK)
.SetDetail("Deadlock at node " + std::to_string(deadlockNode->GetNodeId()) +
", split at Wait node " + std::to_string(deadlockWaitNode->GetNodeId()));
SK_LOGI("[DeadlockRefine] scopeBefore kernel different, gets deadlock break info");
}
SK_LOGI("[DeadlockRefine] scopeBefore break info: %s", scopeBefore.GetBreakInfo().Format().c_str());
}
static void SetupScopeAfterBreakInfo(SuperKernelScopeInfo& scopeAfter,
const ScopeBreakInfo& originalBreakInfo,
uint16_t originalScopeId,
bool hasSameKernelAsOriginal) {
scopeAfter.MutableBreakInfo() = originalBreakInfo;
if (hasSameKernelAsOriginal) {
scopeAfter.MutableBreakInfo().SetParentScopeId(originalScopeId);
SK_LOGI("[DeadlockRefine] scopeAfter kernel same as original, inherits break info");
} else {
scopeAfter.MutableBreakInfo().SetParentScopeId(INVALID_SCOPE_ID);
SK_LOGI("[DeadlockRefine] scopeAfter kernel different, gets deadlock break info");
}
SK_LOGI("[DeadlockRefine] scopeAfter break info: %s", scopeAfter.GetBreakInfo().Format().c_str());
}
* @brief Handle deadlock split logic: split scope, set break info, and manage sub-scopes
* @param workingScope The scope to split
* @param deadlockNode The node where deadlock is detected
* @param deadlockWaitNode The wait node to split at
* @param outputScopes Output scopes list to store valid scopes
* @param pendingScope Output pending scope for further processing
* @return Processing result of deadlock resolution
*/
ScopeProcessResult DeadlockRefinePass::HandleDeadlockSplit(
SuperKernelScopeInfo& workingScope,
SuperKernelBaseNode* deadlockNode,
SuperKernelBaseNode* deadlockWaitNode,
std::vector<SuperKernelScopeInfo>& outputScopes,
std::optional<SuperKernelScopeInfo>& pendingScope)
{
const ScopeBreakInfo& originalBreakInfo = workingScope.GetBreakInfo();
uint16_t originalScopeId = workingScope.GetScopeId();
SuperKernelScopeInfo scopeBefore;
SuperKernelScopeInfo scopeAfter;
SplitScopeAtWaitNode(workingScope, deadlockWaitNode, scopeBefore, scopeAfter);
deadlockNode->SetFusionFailReason(FusionFailReason::EXIST_DEADLOCK);
SK_LOGI("[DeadlockRefine] Deadlock detected at node %s, splitting at Wait node %s",
deadlockNode->Format().c_str(), deadlockWaitNode->Format().c_str());
SK_LOGI("[DeadlockRefine] Before split: original=%zu nodes, scopeBefore=%zu, scopeAfter=%zu",
workingScope.GetNodes().size(), scopeBefore.GetNodes().size(), scopeAfter.GetNodes().size());
bool hasSameKernelAsOriginal = HasSameKernelNodes(workingScope, scopeBefore);
SetupScopeBeforeBreakInfo(scopeBefore, originalBreakInfo, originalScopeId,
deadlockNode, deadlockWaitNode, hasSameKernelAsOriginal);
hasSameKernelAsOriginal = HasSameKernelNodes(workingScope, scopeAfter);
SetupScopeAfterBreakInfo(scopeAfter, originalBreakInfo, originalScopeId,
hasSameKernelAsOriginal);
if (!scopeBefore.GetNodes().empty()) {
lockDetector_.SetNotifyNodesExpandNumForScope(scopeBefore);
outputScopes.push_back(std::move(scopeBefore));
} else {
SK_LOGI("[DeadlockRefine] scopeBefore is empty after split, no scope added before Wait node");
}
if (!scopeAfter.GetNodes().empty()) {
pendingScope = std::move(scopeAfter);
SK_LOGI("[DeadlockRefine] scopeAfter has %zu nodes and will be processed further",
pendingScope->GetNodes().size());
} else {
SK_LOGI("[DeadlockRefine] scopeAfter is empty, no further processing required for this scope");
}
return ScopeProcessResult::DEADLOCK_RESOLVED;
}
ScopeProcessResult DeadlockRefinePass::ProcessSingleScope(
SuperKernelScopeInfo&& scopeToProcess,
std::vector<SuperKernelScopeInfo>& outputScopes,
std::optional<SuperKernelScopeInfo>& pendingScope) {
pendingScope.reset();
SuperKernelScopeInfo workingScope = std::move(scopeToProcess);
if (workingScope.GetNodes().empty()) {
SK_LOGI("[DeadlockRefine] ProcessSingleScope called with empty scope, nothing to do");
return ScopeProcessResult::NO_DEADLOCK;
}
SuperKernelBaseNode* deadlockNode = nullptr;
SuperKernelBaseNode* deadlockWaitNode = nullptr;
if (!FindDeadlockInScope(workingScope, &deadlockNode, &deadlockWaitNode)) {
lockDetector_.SetNotifyNodesExpandNumForScope(workingScope);
SK_LOGI("[DeadlockRefine] Scope has no deadlock, added as a whole with %zu nodes",
workingScope.GetNodes().size());
outputScopes.push_back(std::move(workingScope));
return ScopeProcessResult::NO_DEADLOCK;
}
if (deadlockWaitNode == nullptr) {
SK_LOGE("[DeadlockRefine] Deadlock at node %s but no Wait node found to split; refinement failed",
deadlockNode != nullptr ? deadlockNode->Format().c_str() : "<null>");
return ScopeProcessResult::DEADLOCK_UNRESOLVED;
}
return HandleDeadlockSplit(workingScope, deadlockNode, deadlockWaitNode, outputScopes, pendingScope);
}
bool DeadlockRefinePass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[DeadlockRefine] %s pass starting execution", GetName().c_str());
SK_LOGI("[DeadlockRefine] input scopes count: %zu", scopes.size());
std::vector<SuperKernelScopeInfo> refinedScopes;
size_t splitCount = 0;
for (size_t i = 0; i < scopes.size(); ++i) {
SK_LOGI("[DeadlockRefine] Processing scope index %zu with %zu nodes", i, scopes[i].GetNodes().size());
SuperKernelScopeInfo currentScope = std::move(scopes[i]);
std::optional<SuperKernelScopeInfo> pendingScope;
while (true) {
ScopeProcessResult result = ProcessSingleScope(std::move(currentScope), refinedScopes, pendingScope);
if (result == ScopeProcessResult::DEADLOCK_UNRESOLVED) {
SK_LOGE("[DeadlockRefine] Scope %zu: deadlock detected and cannot be resolved, aborting refinement",
i);
scopes.clear();
return false;
}
if (result == ScopeProcessResult::DEADLOCK_RESOLVED) {
splitCount++;
}
if (!pendingScope.has_value()) {
break;
}
currentScope = std::move(*pendingScope);
pendingScope.reset();
}
}
scopes = std::move(refinedScopes);
for (auto& scope : scopes) {
lockDetector_.ResetNotifyExpandNumForScope(scope);
}
lockDetector_.Reset();
SK_LOGI("[DeadlockRefine] %s pass completed, split %zu scopes, total scopes: %zu",
GetName().c_str(), splitCount, scopes.size());
PrintScopeResults(scopes, graph_, GetName().c_str());
return true;
}
ScheModeKernelSplitPass::ScheModeKernelSplitPass(SuperKernelGraph& inputGraph)
: ScopeSplitPass(inputGraph) {}
void ScheModeKernelSplitPass::SplitScopeAtNode(const SuperKernelScopeInfo& scope,
SuperKernelBaseNode* splitNode,
SuperKernelScopeInfo& scopeBefore,
SuperKernelScopeInfo& scopeAfter) {
scopeBefore.SetScopeBitFlags(scope.GetScopeBitFlags());
scopeAfter.SetScopeBitFlags(scope.GetScopeBitFlags());
bool foundSplit = false;
for (const auto* node : scope.GetNodes()) {
if (node == splitNode) {
foundSplit = true;
}
if (!foundSplit) {
scopeBefore.AddNode(const_cast<SuperKernelBaseNode*>(node));
} else {
scopeAfter.AddNode(const_cast<SuperKernelBaseNode*>(node));
}
}
RebuildStreamInfos(scopeBefore);
RebuildStreamInfos(scopeAfter);
}
static void SetupScheModeScopeBeforeBreakInfo(SuperKernelScopeInfo& scopeBefore,
const ScopeBreakInfo& originalBreakInfo,
uint16_t originalScopeId,
SuperKernelBaseNode* splitNode,
ScopeBreakReason breakReason,
uint32_t mergedCube, uint32_t mergedVec,
uint32_t curCube, uint32_t curVec,
bool hasSameKernelAsOriginal)
{
if (hasSameKernelAsOriginal) {
scopeBefore.MutableBreakInfo() = originalBreakInfo;
scopeBefore.MutableBreakInfo().SetParentScopeId(originalScopeId);
SK_LOGI("[ScheModeSplit] scopeBefore kernel same as original, inherits break info");
} else {
bool isDrop = (breakReason == ScopeBreakReason::SCHEMODE_CORE_DROP);
std::string detail = "ScheMode core " + std::string(isDrop ? "drop" : "rise") +
" at node " + std::to_string(splitNode->GetNodeId()) +
": merged(" + std::to_string(mergedCube) + "," + std::to_string(mergedVec) + ")" +
" -> cur(" + std::to_string(curCube) + "," + std::to_string(curVec) + ")";
scopeBefore.MutableBreakInfo()
.SetReason(breakReason)
.SetTriggerNode(splitNode->GetNodeId(), splitNode->GetStreamIdxInGraph())
.SetDetail(detail);
SK_LOGI("[ScheModeSplit] scopeBefore kernel different, set ScheMode break info");
}
SK_LOGI("[ScheModeSplit] scopeBefore break info: %s", scopeBefore.GetBreakInfo().Format().c_str());
}
ScheModeScopeProcessResult ScheModeKernelSplitPass::ProcessSingleScope(
SuperKernelScopeInfo&& scopeToProcess,
std::vector<SuperKernelScopeInfo>& outputScopes,
std::optional<SuperKernelScopeInfo>& pendingScope) {
pendingScope.reset();
SuperKernelScopeInfo workingScope = std::move(scopeToProcess);
if (workingScope.GetNodes().empty()) {
SK_LOGI("[ScheModeSplit] ProcessSingleScope called with empty scope, nothing to do");
return ScheModeScopeProcessResult::NO_SPLIT;
}
uint32_t mergedCubeNum = 0;
uint32_t mergedVecNum = 0;
bool hasMergedKernel = false;
bool hasMergedScheMode = false;
auto& nodes = workingScope.GetNodes();
for (size_t i = 0; i < nodes.size(); ++i) {
SuperKernelBaseNode* node = nodes[i];
if (node == nullptr || node->GetNodeType() != SkNodeType::NODE_KERNEL) {
continue;
}
uint32_t curCubeNum = node->GetCubeNum();
uint32_t curVecNum = node->GetVecNum();
if (node->IsScheModeOn()) {
if (curVecNum == 0) {
curVecNum = curCubeNum * 2;
}
if (curCubeNum == 0) {
curCubeNum = (curVecNum + 1) / 2;
}
}
if (!hasMergedKernel) {
mergedCubeNum = curCubeNum;
mergedVecNum = curVecNum;
hasMergedKernel = true;
hasMergedScheMode = node->IsScheModeOn();
continue;
}
bool needSplit = false;
ScopeBreakReason breakReason = ScopeBreakReason::NONE;
if (node->IsScheModeOn()) {
const bool isCubeSmallerThanMerged = (curCubeNum != 0) && (curCubeNum < mergedCubeNum);
const bool isVecSmallerThanMerged = (curVecNum != 0) && (curVecNum < mergedVecNum);
if (isCubeSmallerThanMerged || isVecSmallerThanMerged) {
needSplit = true;
breakReason = ScopeBreakReason::SCHEMODE_CORE_DROP;
}
}
if (!needSplit && hasMergedScheMode) {
const bool isCubeBiggerThanMerged = (curCubeNum != 0) && (curCubeNum > mergedCubeNum);
const bool isVecBiggerThanMerged = (curVecNum != 0) && (curVecNum > mergedVecNum);
if (isCubeBiggerThanMerged || isVecBiggerThanMerged) {
needSplit = true;
breakReason = ScopeBreakReason::SCHEMODE_CORE_RISE;
}
}
if (needSplit) {
const ScopeBreakInfo& originalBreakInfo = workingScope.GetBreakInfo();
uint16_t originalScopeId = workingScope.GetScopeId();
SuperKernelScopeInfo scopeBefore;
SuperKernelScopeInfo scopeAfter;
SplitScopeAtNode(workingScope, node, scopeBefore, scopeAfter);
SK_LOGI("[ScheModeSplit] split required at kernel %s, mergedCore={cube:%u, vec:%u}, "
"currentCore={cube:%u, vec:%u}, reason=%s, scopeBefore=%zu, scopeAfter=%zu",
node->Format().c_str(),
mergedCubeNum, mergedVecNum,
curCubeNum, curVecNum,
ScopeBreakReasonToStr(breakReason),
scopeBefore.GetNodes().size(), scopeAfter.GetNodes().size());
bool hasSameKernelAsOriginal = HasSameKernelNodes(workingScope, scopeBefore);
SetupScheModeScopeBeforeBreakInfo(scopeBefore, originalBreakInfo,
originalScopeId, node, breakReason,
mergedCubeNum, mergedVecNum, curCubeNum, curVecNum,
hasSameKernelAsOriginal);
SK_LOGI("[ScheModeSplit] scopeBefore break info: %s", scopeBefore.GetBreakInfo().Format().c_str());
hasSameKernelAsOriginal = HasSameKernelNodes(workingScope, scopeAfter);
SetupScopeAfterBreakInfo(scopeAfter, originalBreakInfo, originalScopeId, hasSameKernelAsOriginal);
SK_LOGI("[ScheModeSplit] scopeAfter break info: %s", scopeAfter.GetBreakInfo().Format().c_str());
if (!scopeBefore.GetNodes().empty()) {
outputScopes.push_back(std::move(scopeBefore));
}
if (!scopeAfter.GetNodes().empty()) {
pendingScope = std::move(scopeAfter);
}
return ScheModeScopeProcessResult::SPLIT_RESOLVED;
}
mergedCubeNum = std::max(mergedCubeNum, curCubeNum);
mergedVecNum = std::max(mergedVecNum, curVecNum);
hasMergedScheMode = hasMergedScheMode || node->IsScheModeOn();
}
outputScopes.push_back(std::move(workingScope));
return ScheModeScopeProcessResult::NO_SPLIT;
}
bool ScheModeKernelSplitPass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[ScheModeSplit] %s pass starting execution", GetName().c_str());
SK_LOGI("[ScheModeSplit] input scopes count: %zu", scopes.size());
std::vector<SuperKernelScopeInfo> resScopes;
size_t splitCount = 0;
for (size_t i = 0; i < scopes.size(); ++i) {
SK_LOGI("[ScheModeSplit] Processing scope index %zu with %zu nodes", i, scopes[i].GetNodes().size());
SuperKernelScopeInfo currentScope = std::move(scopes[i]);
std::optional<SuperKernelScopeInfo> pendingScope;
while (true) {
ScheModeScopeProcessResult result = ProcessSingleScope(std::move(currentScope),
resScopes, pendingScope);
if (result == ScheModeScopeProcessResult::SPLIT_RESOLVED) {
splitCount++;
}
if (!pendingScope.has_value()) {
break;
}
currentScope = std::move(*pendingScope);
pendingScope.reset();
}
}
scopes = std::move(resScopes);
SK_LOGI("[ScheModeSplit] %s pass completed, split %zu scopes, total scopes: %zu",
GetName().c_str(), splitCount, scopes.size());
PrintScopeResults(scopes, graph_, GetName().c_str());
return true;
}
SuperKernelScopeSplitter::SuperKernelScopeSplitter(SuperKernelGraph& inputGraph, SuperKernelOptionsManager& opts)
: graph_(inputGraph), opts_(&opts) {
const auto* option = static_cast<const AggressiveOptStrategiesOption*>(
opts.GetOption(aclskOptionType::AGGRESSIVE_OPT_STRATEGIES));
enableTaskBreakerBypass_ = (option != nullptr && option->GetValue().taskBreakerBypass == 1);
auto autoOpParallel = opts.GetOption(aclskOptionType::AUTO_OP_PARALLEL);
SkHeapType heapType = SkHeapType::PRIORITY_QUEUE;
if (autoOpParallel != nullptr) {
heapType = static_cast<SkHeapType>(autoOpParallel->GetIntValue());
}
auto perOpMaxCoreOpt = opts.GetOption(aclskOptionType::DEBUG_PER_OP_MAX_CORE_NUM);
bool enablePerOpMaxCore = (perOpMaxCoreOpt != nullptr && perOpMaxCoreOpt->GetIntValue() == 1);
if (enablePerOpMaxCore) {
SK_LOGI("[ScopeSplitter] DEBUG_PER_OP_MAX_CORE_NUM enabled");
passes_.push_back(std::make_unique<PerOpMaxCoreSplitPass>(inputGraph));
} else {
if (enableTaskBreakerBypass_) {
SK_LOGI("[ScopeSplitter] taskBreakerBypass enabled");
}
passes_.push_back(std::make_unique<InitialScopeSplitPass>(inputGraph, heapType));
passes_.push_back(std::make_unique<ScheModeKernelSplitPass>(inputGraph));
passes_.push_back(std::make_unique<DeadlockRefinePass>(inputGraph, *opts_));
if (enableTaskBreakerBypass_) {
passes_.push_back(std::make_unique<DefaultNodeProcessPass>(inputGraph));
passes_.push_back(std::make_unique<DeadlockRefinePass>(inputGraph, *opts_));
}
passes_.push_back(std::make_unique<EventOnlyStreamRemovePass>(inputGraph));
}
for (auto& pass : passes_) {
pass->SetSplitter(this);
}
}
void SuperKernelScopeSplitter::InitDefaultNodeFusibility() {
if (!enableTaskBreakerBypass_) {
return;
}
SK_LOGI("[ScopeSplitter] Initializing default node fusibility for taskBreakerBypass");
uint32_t count = 0;
const auto& headNodes = graph_.GetHeadNodes();
for (uint64_t headNodeId : headNodes) {
uint64_t nodeId = headNodeId;
while (nodeId != INVALID_TASK_ID) {
SuperKernelBaseNode* node = graph_.GetNodeById(nodeId);
if (node == nullptr) {
break;
}
if (node->GetNodeType() == SkNodeType::NODE_DEFAULT) {
node->SetIsFusible(true);
count++;
SK_LOGI("[ScopeSplitter] Default node %s set isFusible=true", node->Format().c_str());
}
nodeId = node->GetNextNodeId();
}
}
SK_LOGI("[ScopeSplitter] Initialized %u default nodes as fusible", count);
}
bool SuperKernelScopeSplitter::SplitGraph() {
SK_LOGI("[ScopeSplitPipeline] starting scope splitting pipeline");
InitDefaultNodeFusibility();
scopeInfos_.clear();
needResplit_ = false;
const uint32_t maxIterations = 10;
uint32_t iteration = 0;
do {
iteration++;
needResplit_ = false;
scopeInfos_.clear();
SK_LOGI("[ScopeSplitPipeline] iteration %u", iteration);
{
SK_LOG_CONTEXT_SIMPLE("sk_scope_split.log");
SK_LOGI("==========[ScopeSplitPipeline] iteration %u==========", iteration);
}
for (auto& pass : passes_) {
SK_LOGI("[ScopeSplitPipeline] running pass: %s", pass->GetName().c_str());
{
SK_LOG_CONTEXT_SIMPLE("sk_scope_split.log");
SK_LOGI("===========[ScopeSplitPipeline] running pass: %s==========", pass->GetName().c_str());
}
if (!pass->Run(scopeInfos_)) {
SK_LOGE("[ScopeSplitPipeline] pass %s failed (input scopes: %zu)", pass->GetName().c_str(), scopeInfos_.size());
return false;
}
if (needResplit_) {
SK_LOGI("[ScopeSplitPipeline] re-split requested by %s, starting new iteration", pass->GetName().c_str());
break;
}
}
if (iteration >= maxIterations) {
SK_LOGW("[ScopeSplitPipeline] reached maximum iterations (%u), stopping", maxIterations);
break;
}
} while (needResplit_);
ScopeSplitPass::PrintScopeResults(scopeInfos_, graph_, "ScopeSplitPipelineFinal");
PrintScopeBreakReasonReport();
return true;
}
DefaultNodeProcessPass::DefaultNodeProcessPass(SuperKernelGraph& inputGraph)
: ScopeSplitPass(inputGraph) {}
std::unordered_map<uint32_t, StreamDefaultInfo> DefaultNodeProcessPass::CollectStreamInfo(
const SuperKernelScopeInfo& scope) {
std::unordered_map<uint32_t, StreamDefaultInfo> streamInfoMap;
for (const auto* node : scope.GetNodes()) {
uint32_t streamIdx = node->GetStreamIdxInGraph();
auto nodeType = node->GetNodeType();
if (nodeType == SkNodeType::NODE_KERNEL) {
streamInfoMap[streamIdx].hasKernel = true;
} else if (nodeType == SkNodeType::NODE_DEFAULT) {
streamInfoMap[streamIdx].defaults.push_back(const_cast<SuperKernelBaseNode*>(node));
}
}
return streamInfoMap;
}
void DefaultNodeProcessPass::MarkDefaultsUnfusible(const std::vector<SuperKernelBaseNode*>& defaultNodes) {
for (auto* node : defaultNodes) {
node->SetIsFusible(false);
node->SetFusionFailReason(FusionFailReason::DEFAULT_NODE);
SK_LOGI("[DefaultNodeProcess] Default %s marked unfusible (stream has kernel)",
node->Format().c_str());
}
}
void DefaultNodeProcessPass::RemoveDefaultsAndStreams(
SuperKernelScopeInfo& scope,
const std::vector<SuperKernelBaseNode*>& defaultsToRemove,
const std::unordered_set<uint32_t>& streamsToRemove) {
auto nodes = scope.GetNodes();
std::vector<SuperKernelBaseNode*> remaining;
for (auto* node : nodes) {
if (node->GetNodeType() == SkNodeType::NODE_DEFAULT) {
SK_LOGI("[DefaultNodeProcess] Removing default: %s", node->Format().c_str());
continue;
}
remaining.push_back(node);
}
scope.SetNodes(std::move(remaining));
for (uint32_t streamIdx : streamsToRemove) {
RemoveStreamFromScope(scope, streamIdx);
}
RebuildStreamInfos(scope);
}
void DefaultNodeProcessPass::RemoveStreamFromScope(SuperKernelScopeInfo& scope, uint32_t streamIdx) {
auto nodes = scope.GetNodes();
std::vector<SuperKernelBaseNode*> remaining;
for (auto* node : nodes) {
if (node->GetStreamIdxInGraph() != streamIdx) {
remaining.push_back(node);
} else {
SK_LOGI("[DefaultNodeProcess] Removing event node: %s (stream %u)", node->Format().c_str(), streamIdx);
}
}
scope.SetNodes(std::move(remaining));
}
uint32_t DefaultNodeProcessPass::ProcessSingleScope(SuperKernelScopeInfo& scope) {
auto streamInfoMap = CollectStreamInfo(scope);
std::vector<SuperKernelBaseNode*> defaultsToMark;
std::vector<SuperKernelBaseNode*> defaultsToRemove;
std::unordered_set<uint32_t> streamsToRemove;
for (const auto& [streamIdx, info] : streamInfoMap) {
if (info.defaults.empty()) {
continue;
}
if (info.hasKernel) {
defaultsToMark.insert(defaultsToMark.end(),
info.defaults.begin(), info.defaults.end());
} else {
defaultsToRemove.insert(defaultsToRemove.end(),
info.defaults.begin(), info.defaults.end());
streamsToRemove.insert(streamIdx);
}
}
if (defaultsToMark.empty() && defaultsToRemove.empty()) {
return 0;
}
SK_LOGI("[DefaultNodeProcess] Scope has %zu defaults (%zu to mark, %zu to remove)",
defaultsToMark.size() + defaultsToRemove.size(),
defaultsToMark.size(), defaultsToRemove.size());
if (!defaultsToMark.empty()) {
MarkDefaultsUnfusible(defaultsToMark);
SK_LOGI("[DefaultNodeProcess] %zu defaults marked unfusible", defaultsToMark.size());
return static_cast<uint32_t>(defaultsToMark.size());
}
RemoveDefaultsAndStreams(scope, defaultsToRemove, streamsToRemove);
SK_LOGI("[DefaultNodeProcess] %zu defaults removed", defaultsToRemove.size());
return 0;
}
bool DefaultNodeProcessPass::Run(std::vector<SuperKernelScopeInfo>& scopes) {
SK_LOGI("[DefaultNodeProcess] Run starting, scopes: %zu", scopes.size());
uint32_t totalMarked = 0;
for (size_t i = 0; i < scopes.size(); ++i) {
uint32_t marked = ProcessSingleScope(scopes[i]);
totalMarked += marked;
}
if (totalMarked > 0) {
SK_LOGI("[DefaultNodeProcess] %u defaults marked unfusible, triggering resplit", totalMarked);
scopes.clear();
RequestResplit();
return true;
}
std::vector<SuperKernelScopeInfo> valid;
for (size_t i = 0; i < scopes.size(); ++i) {
if (!scopes[i].GetNodes().empty()) {
valid.push_back(std::move(scopes[i]));
} else {
SK_LOGI("[DefaultNodeProcess] Scope %zu empty after removal, dropping", i);
}
}
scopes = std::move(valid);
SK_LOGI("[DefaultNodeProcess] Run completed, scopes: %zu", scopes.size());
PrintScopeResults(scopes, graph_, GetName().c_str());
return true;
}
std::string FormatNodeIds(const SuperKernelScopeInfo& scope)
{
std::string result;
for (const auto* node : scope.GetNodes()) {
if (node != nullptr) {
if (!result.empty()) result += ", ";
result += std::to_string(node->GetNodeId());
}
}
return result;
}
std::string FormatKernelIds(const std::vector<uint64_t>& kernelIds, size_t maxDisplay = SIZE_MAX)
{
std::string result;
size_t limit = (maxDisplay == SIZE_MAX) ? kernelIds.size() : maxDisplay;
for (size_t j = 0; j < kernelIds.size() && j < limit; ++j) {
if (j > 0) result += ", ";
result += std::to_string(kernelIds[j]);
}
if (kernelIds.size() > limit) result += "...";
return result;
}
std::unordered_map<uint16_t, size_t> BuildScopeIdIndexMap(const std::vector<SuperKernelScopeInfo>& scopeInfos)
{
std::unordered_map<uint16_t, size_t> map;
for (size_t i = 0; i < scopeInfos.size(); ++i) {
map[scopeInfos[i].GetScopeId()] = i;
}
return map;
}
void PrintScopeSummary(const SuperKernelScopeInfo& scope, size_t idx)
{
SK_LOGI("Scope %zu (scopeId=%u): %zu nodes", idx, scope.GetScopeId(), scope.GetNodes().size());
}
void PrintTriggerNode(const ScopeBreakInfo& breakInfo, SuperKernelGraph& graph)
{
if (breakInfo.GetTriggerNodeId() == INVALID_TASK_ID) return;
SuperKernelBaseNode* node = graph.GetNodeById(breakInfo.GetTriggerNodeId());
if (node != nullptr) {
SK_LOGI(" Trigger node: %s", node->Format().c_str());
}
}
void PrintKernelNodes(const std::vector<uint64_t>& kernelIds)
{
if (kernelIds.empty()) return;
SK_LOGI(" Kernel nodes: %zu (ids: %s)", kernelIds.size(), FormatKernelIds(kernelIds, 5).c_str());
}
std::unordered_map<ScopeBreakReason, size_t> CountRootBreakReasons(
const std::vector<SuperKernelScopeInfo>& scopeInfos,
const std::unordered_map<uint16_t, size_t>& scopeIdToIdx)
{
std::unordered_map<ScopeBreakReason, size_t> counts;
for (const auto& scope : scopeInfos) {
ScopeBreakInfo rootInfo = FindRootBreakInfo(scope, scopeIdToIdx, scopeInfos);
counts[rootInfo.GetReason()]++;
}
return counts;
}
void PrintRootBreakReasonSummary(const std::unordered_map<ScopeBreakReason, size_t>& rootReasonCounts)
{
SK_LOGI("Root Break Reason Summary:");
for (const auto& pair : rootReasonCounts) {
SK_LOGI(" %s: %zu scopes", ScopeBreakReasonToStr(pair.first), pair.second);
}
}
ScopeBreakInfo FindRootBreakInfo(const SuperKernelScopeInfo& scope,
const std::unordered_map<uint16_t, size_t>& scopeIdToIdx,
const std::vector<SuperKernelScopeInfo>& scopeInfos) {
const SuperKernelScopeInfo* currentScope = &scope;
while (currentScope->GetBreakInfo().GetParentScopeId() != INVALID_SCOPE_ID) {
auto it = scopeIdToIdx.find(currentScope->GetBreakInfo().GetParentScopeId());
if (it != scopeIdToIdx.end()) {
currentScope = &scopeInfos[it->second];
} else {
break;
}
}
return currentScope->GetBreakInfo();
}
void SuperKernelScopeSplitter::PrintAllScopesDetail(const std::vector<SuperKernelScopeInfo>& scopeInfos)
{
SK_LOGI("All Scopes Detail");
for (size_t i = 0; i < scopeInfos.size(); ++i) {
const auto& scope = scopeInfos[i];
std::string nodeIdsStr = FormatNodeIds(scope);
std::string kernelIdsStr = FormatKernelIds(ScopeSplitPass::GetKernelNodeIds(scope));
SK_LOGI(" [%zu] scopeId=%u, allNodeIds=[%s], kernelIds=[%s], breakReason=[%s]",
i, scope.GetScopeId(), nodeIdsStr.c_str(), kernelIdsStr.c_str(),
scope.GetBreakInfo().Format().c_str());
}
}
void SuperKernelScopeSplitter::PrintScopeBreakReasonReport()
{
SK_LOGI("Scope Break Reason Report");
SK_LOGI("Total scopes: %zu", scopeInfos_.size());
auto scopeIdToIdx = BuildScopeIdIndexMap(scopeInfos_);
PrintAllScopesDetail(scopeInfos_);
SK_LOGI("Scope Break Reason Analysis");
for (size_t i = 0; i < scopeInfos_.size(); ++i) {
const auto& scope = scopeInfos_[i];
const auto& breakInfo = scope.GetBreakInfo();
const auto& extInfo = scope.GetExtInfo();
PrintScopeSummary(scope, i);
SK_LOGI(" BreakInfo: %s", breakInfo.Format().c_str());
if (breakInfo.GetParentScopeId() != INVALID_SCOPE_ID){
ScopeBreakInfo rootInfo = FindRootBreakInfo(scope, scopeIdToIdx, scopeInfos_);
SK_LOGI(" Root BreakInfo: %s", rootInfo.Format().c_str());
}
PrintTriggerNode(breakInfo, graph_);
PrintKernelNodes(ScopeSplitPass::GetKernelNodeIds(scope));
}
auto rootReasonCounts = CountRootBreakReasons(scopeInfos_, scopeIdToIdx);
PrintRootBreakReasonSummary(rootReasonCounts);
SK_LOGI("End of Scope Break Reason Report");
}