{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "sys.path.append(\"../..\")\n",
    "\n",
    "from prettytable import PrettyTable, ALL\n",
    "from textwrap import fill\n",
    "from msprof_analyze.advisor.interface.interface import Interface"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
    "profiling_path = r\"YOUR PROFILING PATH\"\n",
    "interface = Interface(profiling_path=profiling_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Block Dim问题识别\n",
    "\n",
    "Block Dim问题主要为识别相关core算子AI core核未打满或者Vector 核未打满问题,主要调优手段为AOE调优,由于AOE调优依赖静态shape,所以当所有算子都为动态shape时,将不会检测相关Block Dim问题.\n",
    "\n",
    "下列代码为样例,主要展示如何检测Block Dim类型问题,并获取相关问题检测结果:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查询computation相关是否存在block dim问题\n",
    "# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
    "block_dim_result = interface.get_result(\"computation\", \"block_dim_analysis\", cann_version=\"7.0.RC1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>category</th>\n",
       "            <th>description</th>\n",
       "            <th>suggestion</th>\n",
       "            <th>problem count</th>\n",
       "            <th>total_time(us)</th>\n",
       "            <th>time ratio</th>\n",
       "            <th>income(us)</th>\n",
       "            <th>income ratio</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>AICore核数</td>\n",
       "            <td>一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill</td>\n",
       "            <td></td>\n",
       "            <td>2935</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.1078</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
       "| category   | description                                                                                                  | suggestion | problem count | total_time(us)     | time ratio | income(us) | income ratio |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
       "| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, |            | 2935          | 2245970.2129999925 | 0.1078     |            |              |\n",
       "|            | GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill                                      |            |               |                    |            |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "problems = block_dim_result.get(\"问题综述\")\n",
    "if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
    "    problem_table = PrettyTable(problems.get(\"headers\"))\n",
    "    for row in problems.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        problem_table.add_row(row)\n",
    "        \n",
    "    problem_table.align = \"l\"\n",
    "    problem_table.hrules = ALL\n",
    "    display(problem_table)\n",
    "else:\n",
    "    print(\"There is no suggestion related to block dim.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>op_name</th>\n",
       "            <th>op_type</th>\n",
       "            <th>task_type</th>\n",
       "            <th>task_duration</th>\n",
       "            <th>income</th>\n",
       "            <th>block_dim</th>\n",
       "            <th>mix_block_dim</th>\n",
       "            <th>input_shapes</th>\n",
       "            <th>input_data_types</th>\n",
       "            <th>input_formats</th>\n",
       "            <th>output_shapes</th>\n",
       "            <th>output_data_types</th>\n",
       "            <th>output_formats</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>aclnnMatmul_TransData_TransData</td>\n",
       "            <td>TransData</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>4868.317</td>\n",
       "            <td>0</td>\n",
       "            <td>26</td>\n",
       "            <td>0</td>\n",
       "            <td>&quot;8192,155136&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "            <td>&quot;9696,512,16,16&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FRACTAL_NZ</td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>aclnnMatmul_TransData_TransData</td>\n",
       "            <td>TransData</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>4838.857</td>\n",
       "            <td>0</td>\n",
       "            <td>26</td>\n",
       "            <td>0</td>\n",
       "            <td>&quot;8192,155136&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "            <td>&quot;9696,512,16,16&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FRACTAL_NZ</td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>aclnnMatmul_TransData_TransData</td>\n",
       "            <td>TransData</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>4798.156</td>\n",
       "            <td>0</td>\n",
       "            <td>26</td>\n",
       "            <td>0</td>\n",
       "            <td>&quot;8192,155136&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "            <td>&quot;9696,512,16,16&quot;</td>\n",
       "            <td>DT_BF16</td>\n",
       "            <td>FRACTAL_NZ</td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
       "|             op_name             |  op_type  |   task_type    | task_duration | income | block_dim | mix_block_dim |  input_shapes | input_data_types | input_formats |  output_shapes   | output_data_types | output_formats |\n",
       "+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
       "| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE |    4868.317   |   0    |     26    |       0       | \"8192,155136\" |     DT_BF16      |   FORMAT_ND   | \"9696,512,16,16\" |      DT_BF16      |   FRACTAL_NZ   |\n",
       "+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
       "| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE |    4838.857   |   0    |     26    |       0       | \"8192,155136\" |     DT_BF16      |   FORMAT_ND   | \"9696,512,16,16\" |      DT_BF16      |   FRACTAL_NZ   |\n",
       "+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+\n",
       "| aclnnMatmul_TransData_TransData | TransData | AI_VECTOR_CORE |    4798.156   |   0    |     26    |       0       | \"8192,155136\" |     DT_BF16      |   FORMAT_ND   | \"9696,512,16,16\" |      DT_BF16      |   FRACTAL_NZ   |\n",
       "+---------------------------------+-----------+----------------+---------------+--------+-----------+---------------+---------------+------------------+---------------+------------------+-------------------+----------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if problems: # 如果存在相关问题则获取相关问题检测细节\n",
    "    block_dim = block_dim_result.get(\"AICore核数\")\n",
    "    block_dim_table = PrettyTable(block_dim.get(\"headers\"))\n",
    "    for row in block_dim.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        block_dim_table.add_row(row)\n",
    "\n",
    "    block_dim_table.hrules = ALL\n",
    "    display(block_dim_table[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Operator No Bound问题识别\n",
    "Operator No Bound问题主要为识别相关算子无mte, cube, vector, scalar相关bound问题,主要调优手段为AOE调优,由于AOE调优依赖静态shape,所以当所有算子都为动态shape时,将不会检测相关Operator No Bound问题.\n",
    "\n",
    "下列代码为样例,主要展示如何检测Operator No Bound类型问题,并获取相关问题检测结果:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from prettytable import PrettyTable, ALL\n",
    "from textwrap import fill\n",
    "from msprof_analyze.advisor.interface.interface import Interface\n",
    "\n",
    "\n",
    "# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
    "profiling_path = r\"YOUR PROFILING PATH\"\n",
    "interface = Interface(profiling_path=profiling_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查询computation相关是否存在operator no bound问题\n",
    "# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
    "operator_no_bound_result = interface.get_result(\"computation\", \"operator_no_bound_analysis\", cann_version=\"7.0.RC1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>category</th>\n",
       "            <th>description</th>\n",
       "            <th>suggestion</th>\n",
       "            <th>problem count</th>\n",
       "            <th>total_time(us)</th>\n",
       "            <th>time ratio</th>\n",
       "            <th>income(us)</th>\n",
       "            <th>income ratio</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>AICore核数</td>\n",
       "            <td>一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill</td>\n",
       "            <td></td>\n",
       "            <td>2935</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.1078</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>算子瓶颈</td>\n",
       "            <td>mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub,  Mul,<br>ApplyAdamW, Cast,  Add, TransData, TensorMove,  ArgMaxWithValue</td>\n",
       "            <td></td>\n",
       "            <td>6612</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.3826</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
       "| category   | description                                                                                                  | suggestion | problem count | total_time(us)     | time ratio | income(us) | income ratio |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
       "| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, |            | 2935          | 2245970.2129999925 | 0.1078     |            |              |\n",
       "|            | GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill                                      |            |               |                    |            |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+\n",
       "| 算子瓶颈   | mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub,  Mul,    |            | 6612          | 2245970.2129999925 | 0.3826     |            |              |\n",
       "|            | ApplyAdamW, Cast,  Add, TransData, TensorMove,  ArgMaxWithValue                                              |            |               |                    |            |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+------------+---------------+--------------------+------------+------------+--------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "problems = operator_no_bound_result.get(\"问题综述\")\n",
    "problem_table = PrettyTable(problems.get(\"headers\"))\n",
    "if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
    "    for row in problems.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        problem_table.add_row(row)\n",
    "\n",
    "    problem_table.align = \"l\"\n",
    "    problem_table.hrules = ALL\n",
    "    display(problem_table)\n",
    "else:\n",
    "    print(\"There is no suggestion related to operator no bound.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>op_name</th>\n",
       "            <th>op_type</th>\n",
       "            <th>task_type</th>\n",
       "            <th>task_duration</th>\n",
       "            <th>vec_ratio</th>\n",
       "            <th>mac_ratio</th>\n",
       "            <th>scalar_ratio</th>\n",
       "            <th>mte1_ratio</th>\n",
       "            <th>mte2_ratio</th>\n",
       "            <th>mte3_ratio</th>\n",
       "            <th>block_dim</th>\n",
       "            <th>input_shapes</th>\n",
       "            <th>input_data_types</th>\n",
       "            <th>input_formats</th>\n",
       "            <th>output_shapes</th>\n",
       "            <th>output_data_types</th>\n",
       "            <th>output_formats</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>aclnnDivs_RealDivAiCore_RealDiv</td>\n",
       "            <td>RealDiv</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>28468.789</td>\n",
       "            <td>0.095</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.4</td>\n",
       "            <td>48</td>\n",
       "            <td>&quot;87351296;&quot;</td>\n",
       "            <td>FLOAT;FLOAT</td>\n",
       "            <td>FORMAT_ND;FORMAT_ND</td>\n",
       "            <td>&quot;87351296&quot;</td>\n",
       "            <td>FLOAT</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>aclnnExp_ExpAiCore_Exp</td>\n",
       "            <td>Exp</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>8166.943</td>\n",
       "            <td>0.06</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.505</td>\n",
       "            <td>48</td>\n",
       "            <td>&quot;8165,1,155136&quot;</td>\n",
       "            <td>FLOAT</td>\n",
       "            <td>FORMAT_NCL</td>\n",
       "            <td>&quot;8165,1,155136&quot;</td>\n",
       "            <td>FLOAT</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>aclnnExp_ExpAiCore_Exp</td>\n",
       "            <td>Exp</td>\n",
       "            <td>AI_VECTOR_CORE</td>\n",
       "            <td>8154.463</td>\n",
       "            <td>0.06</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.0</td>\n",
       "            <td>0.509</td>\n",
       "            <td>48</td>\n",
       "            <td>&quot;8168,1,155136&quot;</td>\n",
       "            <td>FLOAT</td>\n",
       "            <td>FORMAT_NCL</td>\n",
       "            <td>&quot;8168,1,155136&quot;</td>\n",
       "            <td>FLOAT</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
       "|             op_name             | op_type |   task_type    | task_duration | vec_ratio | mac_ratio | scalar_ratio | mte1_ratio | mte2_ratio | mte3_ratio | block_dim |   input_shapes  | input_data_types |    input_formats    |  output_shapes  | output_data_types | output_formats |\n",
       "+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
       "| aclnnDivs_RealDivAiCore_RealDiv | RealDiv | AI_VECTOR_CORE |   28468.789   |   0.095   |    0.0    |     0.0      |    0.0     |    0.0     |    0.4     |     48    |   \"87351296;\"   |   FLOAT;FLOAT    | FORMAT_ND;FORMAT_ND |    \"87351296\"   |       FLOAT       |   FORMAT_ND    |\n",
       "+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
       "|      aclnnExp_ExpAiCore_Exp     |   Exp   | AI_VECTOR_CORE |    8166.943   |    0.06   |    0.0    |     0.0      |    0.0     |    0.0     |   0.505    |     48    | \"8165,1,155136\" |      FLOAT       |      FORMAT_NCL     | \"8165,1,155136\" |       FLOAT       |   FORMAT_ND    |\n",
       "+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+\n",
       "|      aclnnExp_ExpAiCore_Exp     |   Exp   | AI_VECTOR_CORE |    8154.463   |    0.06   |    0.0    |     0.0      |    0.0     |    0.0     |   0.509    |     48    | \"8168,1,155136\" |      FLOAT       |      FORMAT_NCL     | \"8168,1,155136\" |       FLOAT       |   FORMAT_ND    |\n",
       "+---------------------------------+---------+----------------+---------------+-----------+-----------+--------------+------------+------------+------------+-----------+-----------------+------------------+---------------------+-----------------+-------------------+----------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if problems: # 如果存在相关问题则获取相关问题检测细节\n",
    "    operator_no_bound = operator_no_bound_result.get(\"算子瓶颈\")\n",
    "    operator_no_bound_table = PrettyTable(operator_no_bound.get(\"headers\"))\n",
    "    for row in operator_no_bound.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        operator_no_bound_table.add_row(row)\n",
    "    operator_no_bound_table.hrules = ALL\n",
    "    display(operator_no_bound_table[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### AICPU问题识别\n",
    "AICPU问题主要为识别相关算子执行时跑到AICPU上计算,并没有利用到AI CORE的计算能力的场景,主要调优手段为修改相关代码来避免AICPU算子,可参见相关资料,来避免AICPU算子的问题:\n",
    "https://gitcode.com/Ascend/msprof-analyze/blob/master/docs/zh/aicpu_operator_replacement_example.md\n",
    "\n",
    "下列代码为样例,主要展示如何检测Dynamic Shape类型问题,并获取相关问题检测结果:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from prettytable import PrettyTable, ALL\n",
    "from textwrap import fill\n",
    "from msprof_analyze.advisor.interface.interface import Interface\n",
    "\n",
    "\n",
    "# 配置profiling采集出来的数据,需要指定到的profiling目录是同一个工具采集的,并且需要采集l0级别以上\n",
    "profiling_path = r\"YOUR PROFILING PATH\"\n",
    "interface = Interface(profiling_path=profiling_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查询computation相关是否存在aicpu问题\n",
    "# 如果profiling数据采集自非8.0.RC1的CANN版本,需要在训练/推理环境中执行: 'cat CANN安装目录/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info'命令查看version\n",
    "aicpu_result = interface.get_result(\"computation\", \"aicpu_analysis\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>category</th>\n",
       "            <th>description</th>\n",
       "            <th>suggestion</th>\n",
       "            <th>problem count</th>\n",
       "            <th>total_time(us)</th>\n",
       "            <th>time ratio</th>\n",
       "            <th>income(us)</th>\n",
       "            <th>income ratio</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>AICore核数</td>\n",
       "            <td>一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue,<br>GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill</td>\n",
       "            <td></td>\n",
       "            <td>2935</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.1078</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>算子瓶颈</td>\n",
       "            <td>mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub,  Mul,<br>ApplyAdamW, Cast,  Add, TransData, TensorMove,  ArgMaxWithValue</td>\n",
       "            <td></td>\n",
       "            <td>6612</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.3826</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>AICPU算子</td>\n",
       "            <td>一些算子和任务执行时间超过了20us,比如: Min, Max, Bincount,  Equal</td>\n",
       "            <td>1. 修改代码避免使用aicpu类算子 2. 尝试将double类型的算子转换成float,比如aclnnEqScalar_EqualAiCpu_Equal</td>\n",
       "            <td>100</td>\n",
       "            <td>2245970.2129999925</td>\n",
       "            <td>0.0054</td>\n",
       "            <td></td>\n",
       "            <td></td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
       "| category   | description                                                                                                  | suggestion                                                                                              | problem count | total_time(us)     | time ratio | income(us) | income ratio |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
       "| AICore核数 | 一些算子没有充分利用24个AICore核或者48个AIVector核;  任务耗时最长的10个算子如下:TransData, ArgMaxWithValue, |                                                                                                         | 2935          | 2245970.2129999925 | 0.1078     |            |              |\n",
       "|            | GroupedMatmul,  ConcatD, BroadcastTo, Tile,  MatMulV2, Mul, Cast,  Fill                                      |                                                                                                         |               |                    |            |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
       "| 算子瓶颈   | mte,cube,vetor,scalar比都没有超过 80.00%,需要调整的任务执行时间最长的算子如下: RealDiv, Exp, Sub,  Mul,    |                                                                                                         | 6612          | 2245970.2129999925 | 0.3826     |            |              |\n",
       "|            | ApplyAdamW, Cast,  Add, TransData, TensorMove,  ArgMaxWithValue                                              |                                                                                                         |               |                    |            |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+\n",
       "| AICPU算子  | 一些算子和任务执行时间超过了20us,比如: Min, Max, Bincount,  Equal                                          | 1. 修改代码避免使用aicpu类算子 2. 尝试将double类型的算子转换成float,比如aclnnEqScalar_EqualAiCpu_Equal | 100           | 2245970.2129999925 | 0.0054     |            |              |\n",
       "+------------+--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+---------------+--------------------+------------+------------+--------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "problems = aicpu_result.get(\"问题综述\")\n",
    "if problems: # 如果存在相关问题则获取相关问题检测描述及建议\n",
    "    problem_table = PrettyTable(problems.get(\"headers\"))\n",
    "    for row in problems.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        problem_table.add_row(row)\n",
    "\n",
    "    problem_table.align = \"l\"\n",
    "    problem_table.hrules = ALL\n",
    "    display(problem_table)\n",
    "else:\n",
    "    print(\"There is no suggestion related to operator no bound.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>op_name</th>\n",
       "            <th>op_type</th>\n",
       "            <th>task_duration</th>\n",
       "            <th>input_shapes</th>\n",
       "            <th>input_data_types</th>\n",
       "            <th>input_formats</th>\n",
       "            <th>output_shapes</th>\n",
       "            <th>output_data_types</th>\n",
       "            <th>output_formats</th>\n",
       "            <th>stack_info</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>aclnnBincount_BincountAiCpu_Bincount</td>\n",
       "            <td>Bincount</td>\n",
       "            <td>363.167</td>\n",
       "            <td>&quot;20512;;20512&quot;</td>\n",
       "            <td>INT32;INT32;INT64</td>\n",
       "            <td>FORMAT_ND;FORMAT_ND;FORMAT_ND</td>\n",
       "            <td>&quot;32&quot;</td>\n",
       "            <td>INT64</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "            <td>/opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;<br>/opt/tiger/janus/janus/megatron/gate.py(383): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward;  tasks/gpt2/unsup/model.py(881):<br>forward_step;  /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(470):<br>forward_backward_no_pipelining;  tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function;  tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main;  /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper;  tasks/gpt2/unsup/model.py(1939): &lt;module&gt;</td>\n",
       "        </tr>\n",
       "        <tr>\n",
       "            <td>aclnnBincount_BincountAiCpu_Bincount</td>\n",
       "            <td>Bincount</td>\n",
       "            <td>339.527</td>\n",
       "            <td>&quot;20512;;20512&quot;</td>\n",
       "            <td>INT32;INT32;INT64</td>\n",
       "            <td>FORMAT_ND;FORMAT_ND;FORMAT_ND</td>\n",
       "            <td>&quot;32&quot;</td>\n",
       "            <td>INT64</td>\n",
       "            <td>FORMAT_ND</td>\n",
       "            <td>/opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;<br>/opt/tiger/janus/janus/megatron/gate.py(383): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward;  tasks/gpt2/unsup/model.py(881):<br>forward_step;  /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining;  tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function;  tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl;  /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main;  /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper;  tasks/gpt2/unsup/model.py(1939): &lt;module&gt;</td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
       "|               op_name                | op_type  | task_duration |  input_shapes  |  input_data_types |         input_formats         | output_shapes | output_data_types | output_formats |                                    stack_info                                   |\n",
       "+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
       "| aclnnBincount_BincountAiCpu_Bincount | Bincount |    363.167    | \"20512;;20512\" | INT32;INT32;INT64 | FORMAT_ND;FORMAT_ND;FORMAT_ND |      \"32\"     |       INT64       |   FORMAT_ND    |      /opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |              /opt/tiger/janus/janus/megatron/gate.py(383): forward;             |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |        /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |         /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |       /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;      |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |       /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;       |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |    tasks/gpt2/unsup/model.py(615): forward;  tasks/gpt2/unsup/model.py(881):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                | forward_step;  /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                /opt/tiger/Megatron-LM/megatron/schedules.py(470):               |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |         forward_backward_no_pipelining;  tasks/gpt2/unsup/model.py(961):        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   _megatron_fwd_bwd_function;  tasks/gpt2/unsup/model.py(1246): training_step;  |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |             /opt/tiger/cruise/cruise/module/wrapper.py(28): forward;            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |            /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                             _train_one_step_forward;                            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;          |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      tasks/gpt2/unsup/model.py(1935): main;  /usr/local/lib/python3.8/site-     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |               wrapper;  tasks/gpt2/unsup/model.py(1939): <module>               |\n",
       "+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+\n",
       "| aclnnBincount_BincountAiCpu_Bincount | Bincount |    339.527    | \"20512;;20512\" | INT32;INT32;INT64 | FORMAT_ND;FORMAT_ND;FORMAT_ND |      \"32\"     |       INT64       |   FORMAT_ND    |      /opt/tiger/janus/janus/megatron/gate.py(332): native_expert_histogram;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |              /opt/tiger/janus/janus/megatron/gate.py(383): forward;             |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |        /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |         /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |       /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;      |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |       /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;       |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |    tasks/gpt2/unsup/model.py(615): forward;  tasks/gpt2/unsup/model.py(881):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                | forward_step;  /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                /opt/tiger/Megatron-LM/megatron/schedules.py(461):               |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |         forward_backward_no_pipelining;  tasks/gpt2/unsup/model.py(961):        |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   _megatron_fwd_bwd_function;  tasks/gpt2/unsup/model.py(1246): training_step;  |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |             /opt/tiger/cruise/cruise/module/wrapper.py(28): forward;            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |     /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):    |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                   _call_impl;  /usr/local/lib/python3.8/site-                   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;         |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |            /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |                             _train_one_step_forward;                            |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |          /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;          |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |      tasks/gpt2/unsup/model.py(1935): main;  /usr/local/lib/python3.8/site-     |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |   packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):   |\n",
       "|                                      |          |               |                |                   |                               |               |                   |                |               wrapper;  tasks/gpt2/unsup/model.py(1939): <module>               |\n",
       "+--------------------------------------+----------+---------------+----------------+-------------------+-------------------------------+---------------+-------------------+----------------+---------------------------------------------------------------------------------+"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if problems: # 如果存在相关问题则获取相关问题检测细节\n",
    "    aicpu = aicpu_result.get(\"AICPU算子\")\n",
    "    aicpu_table = PrettyTable(aicpu.get(\"headers\"))\n",
    "    for row in aicpu.get(\"data\"):\n",
    "        row = [fill(str(element), width=80) for element in row]\n",
    "        aicpu_table.add_row(row)\n",
    "    aicpu_table.hrules = ALL\n",
    "    display(aicpu_table[:2])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}