{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"\n",
"sys.path.append(\"../..\")\n",
"\n",
"from prettytable import PrettyTable, ALL\n",
"from textwrap import fill\n",
"from msprof_analyze.advisor.interface.interface import Interface"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
"profiling_path = r\"YOUR PROFILING PATH\"\n",
"interface = Interface(profiling_path=profiling_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Block Dim问题识别\n",
"\n",
"Block Dim问题主要为识别相关core算子AI core核未打满或者Vector 核未打满问题,主要调优手段为AOE调优,由于AOE调优依赖静态shape,所以当所有算子都为动态shape时,将不会检测相关Block Dim问题.\n",
"\n",
"下列代码为样例,主要展示如何检测Block Dim类型问题,并获取相关问题检测结果:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查询computation相关是否存在block dim问题\n",
"# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
"block_dim_result = interface.get_result(\"computation\", \"block_dim_analysis\", cann_version=\"7.0.RC1\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>category</th>\n",
" <th>description</th>\n",
" <th>suggestion</th>\n",
" <th>problem count</th>\n",
" <th>total_time(us)</th>\n",
" <th>time ratio</th>\n",
" <th>income(us)</th>\n",
" <th>income ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>AICore核数</td>\n",
" <td>一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill</td>\n",
" <td></td>\n",
" <td>2935</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.1078</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
"| category | description | suggestion | problem count | total_time(us) | time ratio | income(us) | income ratio |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
"| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, | | 2935 | 2245970.2129999925 | 0.1078 | | |\n",
"| | GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill | | | | | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"problems = block_dim_result.get(\"问题综述\")\n",
"if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
" problem_table = PrettyTable(problems.get(\"headers\"))\n",
" for row in problems.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" problem_table.add_row(row)\n",
" \n",
" problem_table.align = \"l\"\n",
" problem_table.hrules = ALL\n",
" display(problem_table)\n",
"else:\n",
" print(\"There is no suggestion related to block dim.\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>op_name</th>\n",
" <th>op_type</th>\n",
" <th>task_type</th>\n",
" <th>task_duration</th>\n",
" <th>income</th>\n",
" <th>block_dim</th>\n",
" <th>mix_block_dim</th>\n",
" <th>input_shapes</th>\n",
" <th>input_data_types</th>\n",
" <th>input_formats</th>\n",
" <th>output_shapes</th>\n",
" <th>output_data_types</th>\n",
" <th>output_formats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>aclnnMatmul_TransData_TransData</td>\n",
" <td>TransData</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>4868.317</td>\n",
" <td>0</td>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" <td>"8192,155136"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FORMAT_ND</td>\n",
" <td>"9696,512,16,16"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FRACTAL_NZ</td>\n",
" </tr>\n",
" <tr>\n",
" <td>aclnnMatmul_TransData_TransData</td>\n",
" <td>TransData</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>4838.857</td>\n",
" <td>0</td>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" <td>"8192,155136"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FORMAT_ND</td>\n",
" <td>"9696,512,16,16"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FRACTAL_NZ</td>\n",
" </tr>\n",
" <tr>\n",
" <td>aclnnMatmul_TransData_TransData</td>\n",
" <td>TransData</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>4798.156</td>\n",
" <td>0</td>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" <td>"8192,155136"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FORMAT_ND</td>\n",
" <td>"9696,512,16,16"</td>\n",
" <td>DT_BF16</td>\n",
" <td>FRACTAL_NZ</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
"| op_name | op_type | task_type | task_duration | income | block_dim | mix_block_dim | input_shapes | input_data_types | input_formats | output_shapes | output_data_types | output_formats |\n",
"+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
"| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE | 4868.317 | 0 | 26 | 0 | \"8192,155136\" | DT_BF16 | FORMAT_ND | \"9696,512,16,16\" | DT_BF16 | FRACTAL_NZ |\n",
"+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
"| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE | 4838.857 | 0 | 26 | 0 | \"8192,155136\" | DT_BF16 | FORMAT_ND | \"9696,512,16,16\" | DT_BF16 | FRACTAL_NZ |\n",
"+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
"| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE | 4798.156 | 0 | 26 | 0 | \"8192,155136\" | DT_BF16 | FORMAT_ND | \"9696,512,16,16\" | DT_BF16 | FRACTAL_NZ |\n",
"+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if problems: # 如果存在相关问题则获取相关问题检测细节\n",
" block_dim = block_dim_result.get(\"AICore核数\")\n",
" block_dim_table = PrettyTable(block_dim.get(\"headers\"))\n",
" for row in block_dim.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" block_dim_table.add_row(row)\n",
"\n",
" block_dim_table.hrules = ALL\n",
" display(block_dim_table[:3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Operator No Bound问题识别\n",
"Operator No Bound问题主要为识别相关算子无mte, cube, vector, scalar相关bound问题,主要调优手段为AOE调优,由于AOE调优依赖静态shape,所以当所有算子都为动态shape时,将不会检测相关Operator No Bound问题.\n",
"\n",
"下列代码为样例,主要展示如何检测Operator No Bound类型问题,并获取相关问题检测结果:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from prettytable import PrettyTable, ALL\n",
"from textwrap import fill\n",
"from msprof_analyze.advisor.interface.interface import Interface\n",
"\n",
"\n",
"# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
"profiling_path = r\"YOUR PROFILING PATH\"\n",
"interface = Interface(profiling_path=profiling_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查询computation相关是否存在operator no bound问题\n",
"# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
"operator_no_bound_result = interface.get_result(\"computation\", \"operator_no_bound_analysis\", cann_version=\"7.0.RC1\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>category</th>\n",
" <th>description</th>\n",
" <th>suggestion</th>\n",
" <th>problem count</th>\n",
" <th>total_time(us)</th>\n",
" <th>time ratio</th>\n",
" <th>income(us)</th>\n",
" <th>income ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>AICore核数</td>\n",
" <td>一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill</td>\n",
" <td></td>\n",
" <td>2935</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.1078</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <td>算子瓶颈</td>\n",
" <td>mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub, Mul,<br>ApplyAdamW, Cast, Add, TransData, TensorMove, ArgMaxWithValue</td>\n",
" <td></td>\n",
" <td>6612</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.3826</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
"| category | description | suggestion | problem count | total_time(us) | time ratio | income(us) | income ratio |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
"| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, | | 2935 | 2245970.2129999925 | 0.1078 | | |\n",
"| | GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill | | | | | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
"| 算子瓶颈 | mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub, Mul, | | 6612 | 2245970.2129999925 | 0.3826 | | |\n",
"| | ApplyAdamW, Cast, Add, TransData, TensorMove, ArgMaxWithValue | | | | | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"problems = operator_no_bound_result.get(\"问题综述\")\n",
"problem_table = PrettyTable(problems.get(\"headers\"))\n",
"if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
" for row in problems.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" problem_table.add_row(row)\n",
"\n",
" problem_table.align = \"l\"\n",
" problem_table.hrules = ALL\n",
" display(problem_table)\n",
"else:\n",
" print(\"There is no suggestion related to operator no bound.\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>op_name</th>\n",
" <th>op_type</th>\n",
" <th>task_type</th>\n",
" <th>task_duration</th>\n",
" <th>vec_ratio</th>\n",
" <th>mac_ratio</th>\n",
" <th>scalar_ratio</th>\n",
" <th>mte1_ratio</th>\n",
" <th>mte2_ratio</th>\n",
" <th>mte3_ratio</th>\n",
" <th>block_dim</th>\n",
" <th>input_shapes</th>\n",
" <th>input_data_types</th>\n",
" <th>input_formats</th>\n",
" <th>output_shapes</th>\n",
" <th>output_data_types</th>\n",
" <th>output_formats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>aclnnDivs_RealDivAiCore_RealDiv</td>\n",
" <td>RealDiv</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>28468.789</td>\n",
" <td>0.095</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.4</td>\n",
" <td>48</td>\n",
" <td>"87351296;"</td>\n",
" <td>FLOAT;FLOAT</td>\n",
" <td>FORMAT_ND;FORMAT_ND</td>\n",
" <td>"87351296"</td>\n",
" <td>FLOAT</td>\n",
" <td>FORMAT_ND</td>\n",
" </tr>\n",
" <tr>\n",
" <td>aclnnExp_ExpAiCore_Exp</td>\n",
" <td>Exp</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>8166.943</td>\n",
" <td>0.06</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.505</td>\n",
" <td>48</td>\n",
" <td>"8165,1,155136"</td>\n",
" <td>FLOAT</td>\n",
" <td>FORMAT_NCL</td>\n",
" <td>"8165,1,155136"</td>\n",
" <td>FLOAT</td>\n",
" <td>FORMAT_ND</td>\n",
" </tr>\n",
" <tr>\n",
" <td>aclnnExp_ExpAiCore_Exp</td>\n",
" <td>Exp</td>\n",
" <td>AI_VECTOR_CORE</td>\n",
" <td>8154.463</td>\n",
" <td>0.06</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.509</td>\n",
" <td>48</td>\n",
" <td>"8168,1,155136"</td>\n",
" <td>FLOAT</td>\n",
" <td>FORMAT_NCL</td>\n",
" <td>"8168,1,155136"</td>\n",
" <td>FLOAT</td>\n",
" <td>FORMAT_ND</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
"| op_name | op_type | task_type | task_duration | vec_ratio | mac_ratio | scalar_ratio | mte1_ratio | mte2_ratio | mte3_ratio | block_dim | input_shapes | input_data_types | input_formats | output_shapes | output_data_types | output_formats |\n",
"+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
"| aclnnDivs_RealDivAiCore_RealDiv | RealDiv | AI_VECTOR_CORE | 28468.789 | 0.095 | 0.0 | 0.0 | 0.0 | 0.0 | 0.4 | 48 | \"87351296;\" | FLOAT;FLOAT | FORMAT_ND;FORMAT_ND | \"87351296\" | FLOAT | FORMAT_ND |\n",
"+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
"| aclnnExp_ExpAiCore_Exp | Exp | AI_VECTOR_CORE | 8166.943 | 0.06 | 0.0 | 0.0 | 0.0 | 0.0 | 0.505 | 48 | \"8165,1,155136\" | FLOAT | FORMAT_NCL | \"8165,1,155136\" | FLOAT | FORMAT_ND |\n",
"+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
"| aclnnExp_ExpAiCore_Exp | Exp | AI_VECTOR_CORE | 8154.463 | 0.06 | 0.0 | 0.0 | 0.0 | 0.0 | 0.509 | 48 | \"8168,1,155136\" | FLOAT | FORMAT_NCL | \"8168,1,155136\" | FLOAT | FORMAT_ND |\n",
"+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if problems: # 如果存在相关问题则获取相关问题检测细节\n",
" operator_no_bound = operator_no_bound_result.get(\"算子瓶颈\")\n",
" operator_no_bound_table = PrettyTable(operator_no_bound.get(\"headers\"))\n",
" for row in operator_no_bound.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" operator_no_bound_table.add_row(row)\n",
" operator_no_bound_table.hrules = ALL\n",
" display(operator_no_bound_table[:3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### AICPU问题识别\n",
"AICPU问题主要为识别相关算子执行时跑到AICPU上计算,并没有利用到AI CORE的计算能力的场景,主要调优手段为修改相关代码来避免AICPU算子,可参见相关资料,来避免AICPU算子的问题:\n",
"https://gitcode.com/Ascend/msprof-analyze/blob/master/docs/zh/aicpu_operator_replacement_example.md\n",
"\n",
"下列代码为样例,主要展示如何检测Dynamic Shape类型问题,并获取相关问题检测结果:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from prettytable import PrettyTable, ALL\n",
"from textwrap import fill\n",
"from msprof_analyze.advisor.interface.interface import Interface\n",
"\n",
"\n",
"# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
"profiling_path = r\"YOUR PROFILING PATH\"\n",
"interface = Interface(profiling_path=profiling_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查询computation相关是否存在aicpu问题\n",
"# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
"aicpu_result = interface.get_result(\"computation\", \"aicpu_analysis\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>category</th>\n",
" <th>description</th>\n",
" <th>suggestion</th>\n",
" <th>problem count</th>\n",
" <th>total_time(us)</th>\n",
" <th>time ratio</th>\n",
" <th>income(us)</th>\n",
" <th>income ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>AICore核数</td>\n",
" <td>一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill</td>\n",
" <td></td>\n",
" <td>2935</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.1078</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <td>算子瓶颈</td>\n",
" <td>mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub, Mul,<br>ApplyAdamW, Cast, Add, TransData, TensorMove, ArgMaxWithValue</td>\n",
" <td></td>\n",
" <td>6612</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.3826</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <td>AICPU算子</td>\n",
" <td>一些算子和任务执行时间超过了20us,比如: Min, Max, Bincount, Equal</td>\n",
" <td>1. 修改代码避免使用aicpu类算子 2. 尝试将double类型的算子转换成float,比如aclnnEqScalar_EqualAiCpu_Equal</td>\n",
" <td>100</td>\n",
" <td>2245970.2129999925</td>\n",
" <td>0.0054</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
"| category | description | suggestion | problem count | total_time(us) | time ratio | income(us) | income ratio |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
"| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核; 任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, | | 2935 | 2245970.2129999925 | 0.1078 | | |\n",
"| | GroupedMatmul, ConcatD, BroadcastTo, Tile, MatMulV2, Mul, Cast, Fill | | | | | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
"| 算子瓶颈 | mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub, Mul, | | 6612 | 2245970.2129999925 | 0.3826 | | |\n",
"| | ApplyAdamW, Cast, Add, TransData, TensorMove, ArgMaxWithValue | | | | | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
"| AICPU算子 | 一些算子和任务执行时间超过了20us,比如: Min, Max, Bincount, Equal | 1. 修改代码避免使用aicpu类算子 2. 尝试将double类型的算子转换成float,比如aclnnEqScalar_EqualAiCpu_Equal | 100 | 2245970.2129999925 | 0.0054 | | |\n",
"+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"problems = aicpu_result.get(\"问题综述\")\n",
"if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
" problem_table = PrettyTable(problems.get(\"headers\"))\n",
" for row in problems.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" problem_table.add_row(row)\n",
"\n",
" problem_table.align = \"l\"\n",
" problem_table.hrules = ALL\n",
" display(problem_table)\n",
"else:\n",
" print(\"There is no suggestion related to operator no bound.\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>op_name</th>\n",
" <th>op_type</th>\n",
" <th>task_duration</th>\n",
" <th>input_shapes</th>\n",
" <th>input_data_types</th>\n",
" <th>input_formats</th>\n",
" <th>output_shapes</th>\n",
" <th>output_data_types</th>\n",
" <th>output_formats</th>\n",
" <th>stack_info</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>aclnnBincount_BincountAiCpu_Bincount</td>\n",
" <td>Bincount</td>\n",
" <td>363.167</td>\n",
" <td>"20512;;20512"</td>\n",
" <td>INT32;INT32;INT64</td>\n",
" <td>FORMAT_ND;FORMAT_ND;FORMAT_ND</td>\n",
" <td>"32"</td>\n",
" <td>INT64</td>\n",
" <td>FORMAT_ND</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;<br>/opt/tiger/janus/janus/megatron/gate.py(383): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(470):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" </tr>\n",
" <tr>\n",
" <td>aclnnBincount_BincountAiCpu_Bincount</td>\n",
" <td>Bincount</td>\n",
" <td>339.527</td>\n",
" <td>"20512;;20512"</td>\n",
" <td>INT32;INT32;INT64</td>\n",
" <td>FORMAT_ND;FORMAT_ND;FORMAT_ND</td>\n",
" <td>"32"</td>\n",
" <td>INT64</td>\n",
" <td>FORMAT_ND</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;<br>/opt/tiger/janus/janus/megatron/gate.py(383): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
"| op_name | op_type | task_duration | input_shapes | input_data_types | input_formats | output_shapes | output_data_types | output_formats | stack_info |\n",
"+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
"| aclnnBincount_BincountAiCpu_Bincount | Bincount | 363.167 | \"20512;;20512\" | INT32;INT32;INT64 | FORMAT_ND;FORMAT_ND;FORMAT_ND | \"32\" | INT64 | FORMAT_ND | /opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/gate.py(383): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): |\n",
"| | | | | | | | | | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/schedules.py(470): |\n",
"| | | | | | | | | | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): |\n",
"| | | | | | | | | | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): |\n",
"| | | | | | | | | | _train_one_step_forward; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; |\n",
"| | | | | | | | | | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): |\n",
"| | | | | | | | | | wrapper; tasks/gpt2/unsup/model.py(1939): <module> |\n",
"+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
"| aclnnBincount_BincountAiCpu_Bincount | Bincount | 339.527 | \"20512;;20512\" | INT32;INT32;INT64 | FORMAT_ND;FORMAT_ND;FORMAT_ND | \"32\" | INT64 | FORMAT_ND | /opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/gate.py(383): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; |\n",
"| | | | | | | | | | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): |\n",
"| | | | | | | | | | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; |\n",
"| | | | | | | | | | /opt/tiger/Megatron-LM/megatron/schedules.py(461): |\n",
"| | | | | | | | | | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): |\n",
"| | | | | | | | | | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; |\n",
"| | | | | | | | | | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): |\n",
"| | | | | | | | | | _call_impl; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): |\n",
"| | | | | | | | | | _train_one_step_forward; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; |\n",
"| | | | | | | | | | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; |\n",
"| | | | | | | | | | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- |\n",
"| | | | | | | | | | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): |\n",
"| | | | | | | | | | wrapper; tasks/gpt2/unsup/model.py(1939): <module> |\n",
"+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if problems: # 如果存在相关问题则获取相关问题检测细节\n",
" aicpu = aicpu_result.get(\"AICPU算子\")\n",
" aicpu_table = PrettyTable(aicpu.get(\"headers\"))\n",
" for row in aicpu.get(\"data\"):\n",
" row = [fill(str(element), width=80) for element in row]\n",
" aicpu_table.add_row(row)\n",
" aicpu_table.hrules = ALL\n",
" display(aicpu_table[:2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}