* -------------------------------------------------------------------------
* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*/
#ifndef PROFILER_SERVER_COMPUTESOURCEFILE_H
#define PROFILER_SERVER_COMPUTESOURCEFILE_H
#include <string>
namespace Dic::Module::Source::Test {
const std::string_view SOURCE_NAME = "/test/vec_add1_simt.cpp";
const std::string_view CORE_NAME = "core0.veccore0";
const std::string_view BIN_FILE_PATH_API_LINE = "query_api_line_handler_test.bin";
const std::string_view BIN_FILE_PATH_API_LINE_DYNAMIC = "query_api_line_dynamic_handler_test.bin";
const std::string_view API_FILE_WITH_DTYPE = R"({
"Cores" : ["core0.veccore0", "core0.veccore1"],
"Files Dtype": {
"Lines": {
"Address Range": 0,
"Cycles": 1,
"Instructions Executed": 1,
"Line": 1
}
},
"Files" : [{
"Lines" : [{
"Address Range" : [["0x1134e2d8", "0x1134e4d8"], ["0x1134e138", "0x1134e138"]],
"Cycles" : [56, 56],
"Instructions Executed" : [8, 8],
"Line" : 31
}, {
"Address Range" : [["0x1134e0f0", "0x1134e0f0"], ["0x1134e0f8", "0x1134e0f8"]],
"Cycles" : [284, 284],
"Instructions Executed" : [36, 36],
"Line" : 32
}, {
"Address Range" : [["0x1134e158", "0x1134e158"], ["0x1134e160", "0x1134e160"]],
"Cycles" : [7729, 1984],
"Instructions Executed" : [208, 208],
"Line" : 33
}, {
"Address Range" : [["0x1134e0e8", "0x1134e0e8"], ["0x1134e0f0", "0x1134e0f0"]],
"Cycles" : [8145, 2400],
"Instructions Executed" : [260, 260],
"Line" : 41
}, {
"Address Range" : [["0x1134e048", "0x1134e0b0"], ["0x1134e0b8", "0x1134e0b8"]],
"Cycles" : [2670, 1609],
"Instructions Executed" : [33, 33],
"Line" : 42
}, {
"Address Range" : [["0x1134e000", "0x1134e044"]],
"Cycles" : [1694, 1442],
"Instructions Executed" : [18, 18],
"Line" : 43
}, {
"Address Range" : [["0x1134e048", "0x1134e0b0"], ["0x1134e0b8", "0x1134e0b8"]],
"Cycles" : [2670, 1609],
"Instructions Executed" : [33, 33],
"Line" : 46
}, {
"Address Range" : [["0x1134e0e0", "0x1134e0e0"]],
"Cycles" : [0, 0],
"Instructions Executed" : [1, 1],
"Line" : 56
}
],
"Source" : "/test/vec_add1_simt.cpp"
}
]
})";
const std::string_view INSTR_FILE_WITH_DTYPE = R"({
"Cores" : ["core0.veccore0", "core0.veccore1"],
"Instructions Dtype": {
"Instructions": {
"Address": 3,
"AscendC Inner Code": 3,
"Cycles": 1,
"Instructions Executed": 1,
"Pipe": 3,
"TheoreticalStallCycles": 1,
"Source": 3,
"RealStallCycles": 1
}
},
"Instructions" : [{
"Address" : "0x1134e2d8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 42],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [13, 22],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:0|R],[#ofst:9],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2d0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [360, 44],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECST",
"RealStallCycles" : [36, 42],
"Source" : "SIMT_STG [PEX:6|P],[Rn:6|R],[Rn1:7|R],[Rs:3|R],[#ofst:8],[btype:2],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [28, 28]
}, {
"Address" : "0x1134e2c8",
"AscendC Inner Code" : "/test/compiler/tikcpp/tikcfw/interface/kernel_operator_simt_float_intrinsics.h:104",
"Cycles" : [32, 32],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECEX",
"RealStallCycles" : [75, 55],
"Source" : "SIMT_IADD [PEX:6|P],[Rm:3|R],[Rn:8|R],[Rd:3|R],[waitBitMask:3],[stallCyc:7],[yeild:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2c0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [63, 43],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [8, 10],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:4|R],[Rn1:5|R],[Rd:8|R],[#ofst:8],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2f8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:59",
"Cycles" : [36, 36],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLP",
"RealStallCycles" : [16, 16],
"Source" : "SIMT_END [PEX:7|P],[waitBitMask:3],[stallCyc:a],[yeild:0],[inv:0],[warpId:0],[schId:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2b8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 41],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [21, 25],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:3|R],[#ofst:8],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2a0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [63, 43],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [8, 10],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:4|R],[Rn1:5|R],[Rd:8|R],[#ofst:7],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e298",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 41],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [21, 25],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:3|R],[#ofst:7],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e290",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [452, 44],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECST",
"RealStallCycles" : [36, 42],
"Source" : "SIMT_STG [PEX:6|P],[Rn:6|R],[Rn1:7|R],[Rs:3|R],[#ofst:6],[btype:2],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [28, 28]
}, {
"Address" : "0x1134e288",
"AscendC Inner Code" : "/test/compiler/tikcpp/tikcfw/interface/kernel_operator_simt_float_intrinsics.h:104",
"Cycles" : [32, 32],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECEX",
"RealStallCycles" : [75, 55],
"Source" : "SIMT_IADD [PEX:6|P],[Rm:3|R],[Rn:8|R],[Rd:3|R],[waitBitMask:3],[stallCyc:7],[yeild:0],[inv:0]",
"TheoreticalStallCycles" : [8, 8]
}
]
})";
const std::string_view SOURCE_FILE = R"(/*
* Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
*/
#include "kernel_operator_simt.h"
#include "kernel_operator_simt_float_intrinsics.h"
using namespace AscendC;
namespace simt_add {
#define THREAD_DIM 1024
template <typename T>
class KernelAdd {
public:
__aicore__ KernelAdd() {}
public: __aicore__ inline void Init(GM_ADDR out, GM_ADDR src0, GM_ADDR src1, const int size);
__simt_callee__ inline __aicore__ void SimtCompute() const;
__aicore__ inline void Process();
private:
AscendC::GlobalTensor<T> outGm;
AscendC::GlobalTensor<T> src0Gm;
AscendC::GlobalTensor<T> src1Gm;
int size;
};
template <typename T>
__aicore__ inline void KernelAdd<T>::Init(GM_ADDR out, GM_ADDR src0, GM_ADDR src1,
const int size)
{
outGm.SetGlobalBuffer((__gm__ T*)(out));
src0Gm.SetGlobalBuffer((__gm__ T*)(src0));
src1Gm.SetGlobalBuffer((__gm__ T*)(src1));
this->size = size;
}
template <typename T>
__simt_callee__ inline __aicore__ void KernelAdd<T>::SimtCompute() const
{
// simt 代码
auto dst = outGm.address_;
auto src0 = src0Gm.address_;
auto src1 = src1Gm.address_;
for (int idx = block_idx * 32768; idx < GetThreadIdx<0>() * 8192 + block_idx * 32768 + 10; idx++)
{
dst[idx] = Add(src0[idx], src1[idx]);
}
}
template <typename T>
__aicore__ inline void KernelAdd<T>::Process()
{
// 使用lambda 封装simt_vf simt_vf函数SimtCompute
auto simt_func = [=, *this]() { SimtCompute(); };
// 启动SIMT_VF
ParallelEXE({128}, simt_func);
}
extern "C" __global__ __aicore__ void vec_add1(GM_ADDR src0, GM_ADDR src1, GM_ADDR out, GM_ADDR gm_tiling)
{
simt_add::KernelAdd<DType> op;
op.Init(out, src0, src1, 32768);
op.Process();
}
})";
const std::string_view API_FILE = R"({
"Cores" : ["core0.veccore0", "core0.veccore1"],
"Files" : [{
"Lines" : [{
"Address Range" : [["0x1134e2d8", "0x1134e4d8"], ["0x1134e138", "0x1134e138"]],
"Cycles" : [56, 56],
"Instructions Executed" : [8, 8],
"Line" : 31
}, {
"Address Range" : [["0x1134e0f0", "0x1134e0f0"], ["0x1134e0f8", "0x1134e0f8"]],
"Cycles" : [284, 284],
"Instructions Executed" : [36, 36],
"Line" : 32
}, {
"Address Range" : [["0x1134e158", "0x1134e158"], ["0x1134e160", "0x1134e160"]],
"Cycles" : [7729, 1984],
"Instructions Executed" : [208, 208],
"Line" : 33
}, {
"Address Range" : [["0x1134e0e8", "0x1134e0e8"], ["0x1134e0f0", "0x1134e0f0"]],
"Cycles" : [8145, 2400],
"Instructions Executed" : [260, 260],
"Line" : 41
}, {
"Address Range" : [["0x1134e048", "0x1134e0b0"], ["0x1134e0b8", "0x1134e0b8"]],
"Cycles" : [2670, 1609],
"Instructions Executed" : [33, 33],
"Line" : 42
}, {
"Address Range" : [["0x1134e000", "0x1134e044"]],
"Cycles" : [1694, 1442],
"Instructions Executed" : [18, 18],
"Line" : 43
}, {
"Address Range" : [["0x1134e048", "0x1134e0b0"], ["0x1134e0b8", "0x1134e0b8"]],
"Cycles" : [2670, 1609],
"Instructions Executed" : [33, 33],
"Line" : 46
}, {
"Address Range" : [["0x1134e0e0", "0x1134e0e0"]],
"Cycles" : [0, 0],
"Instructions Executed" : [1, 1],
"Line" : 56
}
],
"Source" : "/test/vec_add1_simt.cpp"
}
]
})";
const std::string_view INSTR_FILE = R"({
"Cores" : ["core0.veccore0", "core0.veccore1"],
"Instructions" : [{
"Address" : "0x1134e2d8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 42],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [13, 22],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:0|R],[#ofst:9],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2d0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [360, 44],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECST",
"RealStallCycles" : [36, 42],
"Source" : "SIMT_STG [PEX:6|P],[Rn:6|R],[Rn1:7|R],[Rs:3|R],[#ofst:8],[btype:2],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [28, 28]
}, {
"Address" : "0x1134e2c8",
"AscendC Inner Code" : "/test/compiler/tikcpp/tikcfw/interface/kernel_operator_simt_float_intrinsics.h:104",
"Cycles" : [32, 32],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECEX",
"RealStallCycles" : [75, 55],
"Source" : "SIMT_IADD [PEX:6|P],[Rm:3|R],[Rn:8|R],[Rd:3|R],[waitBitMask:3],[stallCyc:7],[yeild:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2c0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [63, 43],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [8, 10],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:4|R],[Rn1:5|R],[Rd:8|R],[#ofst:8],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2f8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:59",
"Cycles" : [36, 36],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLP",
"RealStallCycles" : [16, 16],
"Source" : "SIMT_END [PEX:7|P],[waitBitMask:3],[stallCyc:a],[yeild:0],[inv:0],[warpId:0],[schId:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2b8",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 41],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [21, 25],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:3|R],[#ofst:8],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e2a0",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [63, 43],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [8, 10],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:4|R],[Rn1:5|R],[Rd:8|R],[#ofst:7],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e298",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [62, 41],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECLD",
"RealStallCycles" : [21, 25],
"Source" : "SIMT_LDG [PEX:6|P],[Rn:0|R],[Rn1:1|R],[Rd:3|R],[#ofst:7],[cop:1],[l2_cache_hint:0],[rscb_id:7]",
"TheoreticalStallCycles" : [8, 8]
}, {
"Address" : "0x1134e290",
"AscendC Inner Code" : "/test/vec_add1_simt.cpp:50",
"Cycles" : [452, 44],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECST",
"RealStallCycles" : [36, 42],
"Source" : "SIMT_STG [PEX:6|P],[Rn:6|R],[Rn1:7|R],[Rs:3|R],[#ofst:6],[btype:2],[cop:1],[l2_cache_hint:0]",
"TheoreticalStallCycles" : [28, 28]
}, {
"Address" : "0x1134e288",
"AscendC Inner Code" : "/test/compiler/tikcpp/tikcfw/interface/kernel_operator_simt_float_intrinsics.h:104",
"Cycles" : [32, 32],
"Instructions Executed" : [4, 4],
"Pipe" : "RVECEX",
"RealStallCycles" : [75, 55],
"Source" : "SIMT_IADD [PEX:6|P],[Rm:3|R],[Rn:8|R],[Rd:3|R],[waitBitMask:3],[stallCyc:7],[yeild:0],[inv:0]",
"TheoreticalStallCycles" : [8, 8]
}
]
})";
}
#endif