{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(\"../..\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from prettytable import PrettyTable, ALL\n",
"from textwrap import fill\n",
"from msprof_analyze.advisor.interface.interface import Interface"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"profiling_path = \"YOUR PROFILING PATH\"\n",
"interface = Interface(profiling_path=profiling_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 融合算子API识别"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"指定profiling路径后,可以自动识别其中包含的融合算子并给出对应的torch_npu api和需要修改的代码堆栈。基于给定堆栈可以快速定位到需要修改的代码段,替换torch_npu api后,能够减少pytorch侧的小算子的下发,进而提升模型训练速度。"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"timeline_fusion_ops_result = interface.get_result(\"schedule\", \"timeline_fusion_ops\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>category</th>\n",
" <th>description</th>\n",
" <th>suggestion</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>亲和API接口</td>\n",
" <td>目前运行环境版本为cann-8.0.0和torch-2.1.0,发现有2个api接口可以替换。</td>\n",
" <td>1. 请根据子表'Affinity training api'替换训练api接口</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-------------+-----------------------------------------------------------------------+-----------------------------------------------------+\n",
"| category | description | suggestion |\n",
"+-------------+-----------------------------------------------------------------------+-----------------------------------------------------+\n",
"| 亲和API接口 | 目前运行环境版本为cann-8.0.0和torch-2.1.0,发现有2个api接口可以替换。 | 1. 请根据子表'Affinity training api'替换训练api接口 |\n",
"+-------------+-----------------------------------------------------------------------+-----------------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_column_num = 3\n",
"problems = timeline_fusion_ops_result.get(\"问题综述\")\n",
"problem_table = PrettyTable(problems.get(\"headers\")[:display_column_num])\n",
"for row in problems.get(\"data\"):\n",
" for i in range(len(row)):\n",
" row[i] = fill(str(row[i]), width=80)\n",
" problem_table.add_row(row[:display_column_num])\n",
"\n",
"display(problem_table)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"如下所示,存在亲和优化器和梯度裁剪两个可替换的torch_npu api,并给出了具体的堆栈。"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>Affinity API</th>\n",
" <th>Code stacks</th>\n",
" <th>Stack called counts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2665): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(35):<br>forward; /usr/local/lib/python3.8/site-<br>packages/torch/autograd/function.py(539): apply;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90):<br>_npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2404): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2442): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(312): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(401): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886):<br>linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron-<br>LM/megatron/core/tensor_parallel/layers.py(1345): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(1005): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/transformer.py(709): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(307): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(310): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/embedding.py(316): generate_pos_embs;<br>/opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(782): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/embedding.py(313): generate_pos_embs;<br>/opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(782): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2352): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(144): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(116): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(229): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886):<br>linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron-<br>LM/megatron/model/language_model.py(38): parallel_lm_logits;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(534): lm_logits;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(486): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(249): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(23):<br>forward; /usr/local/lib/python3.8/site-<br>packages/torch/autograd/function.py(539): apply;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90):<br>_npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/activate_offload.py(118): h2d_;<br>/opt/tiger/Megatron-LM/megatron/activate_offload.py(147): h2d;<br>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(152): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2450): native_scatter_backward;<br>/opt/tiger/janus/janus/megatron/experts.py(2453): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(862): split;<br>/opt/tiger/janus/janus/megatron/experts.py(2462): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2390): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2404): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.optim.NpuFusedAdamW</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/profiler.py(631):<br>__enter__; /usr/local/lib/python3.8/site-<br>packages/torch/optim/optimizer.py(361): wrapper; /opt/tiger/Megatron-<br>LM/megatron/optimizer/optimizer.py(570): step; /usr/local/lib/python3.8/site-<br>packages/torch/utils/_contextlib.py(115): decorate_context; /opt/tiger/Megatron-<br>LM/megatron/optimizer/distrib_optimizer.py(880): step;<br>/usr/local/lib/python3.8/site-packages/torch/utils/_contextlib.py(115):<br>decorate_context; tasks/gpt2/unsup/model.py(1719): optimizer_step;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1559):<br>_train_one_step_optimize;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1817): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2665): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(35):<br>forward; /usr/local/lib/python3.8/site-<br>packages/torch/autograd/function.py(539): apply;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90):<br>_npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2404): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__;<br>/opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2442): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(312): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(401): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886):<br>linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron-<br>LM/megatron/core/tensor_parallel/layers.py(1345): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(1005): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/transformer.py(709): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(307): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(310): calc_metric;<br>/opt/tiger/janus/janus/megatron/gate.py(392): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/embedding.py(316): generate_pos_embs;<br>/opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(782): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/mariana/mariana/models/layers/embedding.py(313): generate_pos_embs;<br>/opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/attention.py(782): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2352): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(144): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(116): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(229): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886):<br>linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron-<br>LM/megatron/model/language_model.py(38): parallel_lm_logits;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(534): lm_logits;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(486): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/gate.py(249): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(23):<br>forward; /usr/local/lib/python3.8/site-<br>packages/torch/autograd/function.py(539): apply;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90):<br>_npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/Megatron-LM/megatron/activate_offload.py(118): h2d_;<br>/opt/tiger/Megatron-LM/megatron/activate_offload.py(147): h2d;<br>/opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(152): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/opt/tiger/janus/janus/megatron/experts.py(2450): native_scatter_backward;<br>/opt/tiger/janus/janus/megatron/experts.py(2453): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_tensor.py(862): split;<br>/opt/tiger/janus/janus/megatron/experts.py(2462): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.npu_confusion_transpose</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94):<br>npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2390): native_moe_forward;<br>/opt/tiger/janus/janus/megatron/experts.py(2404): forward;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply;<br>/opt/tiger/janus/janus/megatron/experts.py(2330): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_;<br>/opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/moe.py(468): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward;<br>/opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/transformer.py(509): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/module.py(286): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881):<br>forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step;<br>/opt/tiger/Megatron-LM/megatron/schedules.py(461):<br>forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961):<br>_megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step;<br>/opt/tiger/cruise/cruise/module/wrapper.py(28): forward;<br>/usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527):<br>_call_impl; /usr/local/lib/python3.8/site-<br>packages/torch/nn/modules/module.py(1518): _wrapped_call_impl;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1451):<br>_train_one_step_forward;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>torch_npu.optim.NpuFusedAdamW</td>\n",
" <td>/usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__;<br>/usr/local/lib/python3.8/site-packages/torch/autograd/profiler.py(631):<br>__enter__; /usr/local/lib/python3.8/site-<br>packages/torch/optim/optimizer.py(361): wrapper; /opt/tiger/Megatron-<br>LM/megatron/optimizer/optimizer.py(570): step; /usr/local/lib/python3.8/site-<br>packages/torch/utils/_contextlib.py(115): decorate_context;<br>/opt/tiger/Megatron-LM/megatron/optimizer/distrib_optimizer.py(880): step;<br>/usr/local/lib/python3.8/site-packages/torch/utils/_contextlib.py(115):<br>decorate_context; tasks/gpt2/unsup/model.py(1719): optimizer_step;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1559):<br>_train_one_step_optimize;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(1817): _train_one_epoch;<br>/opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit;<br>tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site-<br>packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346):<br>wrapper; tasks/gpt2/unsup/model.py(1939): <module></td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| Affinity API | Code stacks | Stack called counts |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 4 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2665): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(35): | 3 |\n",
"| | forward; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/autograd/function.py(539): apply; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90): | |\n",
"| | _npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2404): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2442): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(312): calc_metric; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(401): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886): | |\n",
"| | linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/core/tensor_parallel/layers.py(1345): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(1005): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/transformer.py(709): _forward; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(307): calc_metric; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(310): calc_metric; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/embedding.py(316): generate_pos_embs; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(782): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/embedding.py(313): generate_pos_embs; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(782): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2352): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(144): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(116): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(229): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | 1 |\n",
"| | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886): | |\n",
"| | linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/model/language_model.py(38): parallel_lm_logits; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(534): lm_logits; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(486): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(249): backward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(23): | 1 |\n",
"| | forward; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/autograd/function.py(539): apply; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90): | |\n",
"| | _npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/activate_offload.py(118): h2d_; | 1 |\n",
"| | /opt/tiger/Megatron-LM/megatron/activate_offload.py(147): h2d; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(152): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2450): native_scatter_backward; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2453): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(862): split; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2462): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2390): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2404): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.optim.NpuFusedAdamW | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/profiler.py(631): | |\n",
"| | __enter__; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/optim/optimizer.py(361): wrapper; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/optimizer/optimizer.py(570): step; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/utils/_contextlib.py(115): decorate_context; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/optimizer/distrib_optimizer.py(880): step; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/utils/_contextlib.py(115): | |\n",
"| | decorate_context; tasks/gpt2/unsup/model.py(1719): optimizer_step; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1559): | |\n",
"| | _train_one_step_optimize; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1817): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 4 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2665): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(35): | 3 |\n",
"| | forward; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/autograd/function.py(539): apply; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90): | |\n",
"| | _npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2404): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(1000): __iter__; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2389): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2442): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(312): calc_metric; | 2 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(401): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886): | |\n",
"| | linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/core/tensor_parallel/layers.py(1345): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(1005): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/transformer.py(709): _forward; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(307): calc_metric; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(310): calc_metric; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/gate.py(392): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(405): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/embedding.py(316): generate_pos_embs; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(782): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/mariana/mariana/models/layers/embedding.py(313): generate_pos_embs; | 1 |\n",
"| | /opt/tiger/mariana/mariana/models/layers/embedding.py(380): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/attention.py(782): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(680): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2352): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(144): forward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(116): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(229): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | 1 |\n",
"| | /opt/tiger/Megatron-LM/megatron/core/tensor_parallel/layers.py(886): | |\n",
"| | linear_with_grad_accumulation_and_async_allreduce; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/model/language_model.py(38): parallel_lm_logits; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(534): lm_logits; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(486): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/gate.py(249): backward; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(23): | 1 |\n",
"| | forward; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/autograd/function.py(539): apply; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(90): | |\n",
"| | _npu_gmm; /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/Megatron-LM/megatron/activate_offload.py(118): h2d_; | 1 |\n",
"| | /opt/tiger/Megatron-LM/megatron/activate_offload.py(147): h2d; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/fused_layer_norm.py(152): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2667): backward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(288): apply | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /opt/tiger/janus/janus/megatron/experts.py(2450): native_scatter_backward; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2453): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_tensor.py(862): split; | 1 |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2462): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.npu_confusion_transpose | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /home/tiger/.local/lib/python3.8/site-packages/mindspeed/ops/gmm.py(94): | |\n",
"| | npu_gmm; /opt/tiger/janus/janus/megatron/experts.py(2390): native_moe_forward; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2404): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/function.py(539): apply; | |\n",
"| | /opt/tiger/janus/janus/megatron/experts.py(2330): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(418): forward_; | |\n",
"| | /opt/tiger/janus/janus/megatron/lego_moe_layer.py(477): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/moe.py(468): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(735): _forward; | |\n",
"| | /opt/tiger/mariana/mariana/models/layers/transformer.py(334): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/transformer.py(509): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(204): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/mariana/mariana/models/text/gpt2_megatron.py(445): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/module.py(286): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/model/distributed.py(230): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | tasks/gpt2/unsup/model.py(615): forward; tasks/gpt2/unsup/model.py(881): | |\n",
"| | forward_step; /opt/tiger/Megatron-LM/megatron/schedules.py(305): forward_step; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/schedules.py(461): | |\n",
"| | forward_backward_no_pipelining; tasks/gpt2/unsup/model.py(961): | |\n",
"| | _megatron_fwd_bwd_function; tasks/gpt2/unsup/model.py(1246): training_step; | |\n",
"| | /opt/tiger/cruise/cruise/module/wrapper.py(28): forward; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/nn/modules/module.py(1527): | |\n",
"| | _call_impl; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/nn/modules/module.py(1518): _wrapped_call_impl; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1451): | |\n",
"| | _train_one_step_forward; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1771): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+\n",
"| torch_npu.optim.NpuFusedAdamW | /usr/local/lib/python3.8/site-packages/torch/_ops.py(692): __call__; | 1 |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/autograd/profiler.py(631): | |\n",
"| | __enter__; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/optim/optimizer.py(361): wrapper; /opt/tiger/Megatron- | |\n",
"| | LM/megatron/optimizer/optimizer.py(570): step; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/utils/_contextlib.py(115): decorate_context; | |\n",
"| | /opt/tiger/Megatron-LM/megatron/optimizer/distrib_optimizer.py(880): step; | |\n",
"| | /usr/local/lib/python3.8/site-packages/torch/utils/_contextlib.py(115): | |\n",
"| | decorate_context; tasks/gpt2/unsup/model.py(1719): optimizer_step; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1559): | |\n",
"| | _train_one_step_optimize; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(1817): _train_one_epoch; | |\n",
"| | /opt/tiger/cruise/cruise/trainer/common_trainer.py(920): fit; | |\n",
"| | tasks/gpt2/unsup/model.py(1935): main; /usr/local/lib/python3.8/site- | |\n",
"| | packages/torch/distributed/elastic/multiprocessing/errors/__init__.py(346): | |\n",
"| | wrapper; tasks/gpt2/unsup/model.py(1939): <module> | |\n",
"+-----------------------------------+----------------------------------------------------------------------------------+---------------------+"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fusion_ops_api = timeline_fusion_ops_result.get(\"亲和API接口\")\n",
"if fusion_ops_api:\n",
" fusion_ops_api_table = PrettyTable(fusion_ops_api.get(\"headers\"))\n",
"\n",
" for row in fusion_ops_api.get(\"data\"):\n",
" for i in range(len(row)):\n",
" row[i] = fill(str(row[i]), width=80)\n",
" fusion_ops_api_table.add_row(row)\n",
"\n",
" fusion_ops_api_table.hrules = ALL\n",
" display(fusion_ops_api_table)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}