msprobe/test/msprobe_test/resources/layer_mapping/pytorch/stack.json-代码预览-MindStudio-Probe:基于昇腾的全场景精度调试工具链项目 - AtomGit

c366fffe创建于 2025年11月14日历史提交
{
    "Tensor.__add__.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py, line 61, in tensor_op_template, \n return TensorOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/utils.py, line 176, in get_ltor_reset_masks_and_position_ids, \n attention_mask[b, 0, (i + 1):, :(i + 1)] = 0",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 174, in get_batch, \n attention_mask, position_ids = get_ltor_reset_masks_and_position_ids(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 243, in forward_step, \n tokens, labels, loss_mask, attention_mask, position_ids = get_batch(",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Tensor.__add__.1.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py, line 61, in tensor_op_template, \n return TensorOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/utils.py, line 176, in get_ltor_reset_masks_and_position_ids, \n attention_mask[b, 0, (i + 1):, :(i + 1)] = 0",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 174, in get_batch, \n attention_mask, position_ids = get_ltor_reset_masks_and_position_ids(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 243, in forward_step, \n tokens, labels, loss_mask, attention_mask, position_ids = get_batch(",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Tensor.__or__.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py, line 61, in tensor_op_template, \n return TensorOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/tensor_parallel/layers.py, line 19, in vocab_parallel_embedding_forward, \n input_mask = (input_ < self.vocab_start_index) | \\",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 217, in forward, \n words_embeddings = self.word_embeddings(input_ids)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 473, in forward, \n encoder_input = self.embedding(enc_input_ids, enc_position_ids,",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Distributed.all_reduce.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py, line 68, in distributed_op_template, \n return DistributedOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/tensor_parallel/mappings.py, line 24, in _reduce, \n torch.distributed.all_reduce(input_, group=get_tensor_model_parallel_group())",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/tensor_parallel/mappings.py, line 223, in forward, \n return _reduce(input_)",
        "File /path_to_package/site-packages/torch/autograd/function.py, line 539, in apply, \n return super().apply(*args, **kwargs)  # type: ignore[misc]",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/tensor_parallel/mappings.py, line 436, in reduce_from_tensor_model_parallel_region, \n return _ReduceFromModelParallelRegion.apply(input_)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/tensor_parallel/layers.py, line 35, in vocab_parallel_embedding_forward, \n output = reduce_from_tensor_model_parallel_region(output_parallel)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 217, in forward, \n words_embeddings = self.word_embeddings(input_ids)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 473, in forward, \n encoder_input = self.embedding(enc_input_ids, enc_position_ids,",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Module.module.module.language_model.embedding.word_embeddings.VocabParallelEmbedding.forward.0": [
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 217, in forward, \n words_embeddings = self.word_embeddings(input_ids)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 473, in forward, \n encoder_input = self.embedding(enc_input_ids, enc_position_ids,",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "NPU.npu_rms_norm.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py, line 78, in npu_op_template, \n return NpuOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/fusions/rms_norm.py, line 26, in wrapper, \n return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.eps)[0]",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1194, in forward, \n norm_output = self.input_norm(hidden_states)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/transformer/transformer.py, line 21, in row_parallel_forward, \n output = forward_func(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Module.module.module.language_model.encoder.layers.0.input_norm.RMSNorm.forward.0": [
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1194, in forward, \n norm_output = self.input_norm(hidden_states)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/transformer/transformer.py, line 21, in row_parallel_forward, \n output = forward_func(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Module.module.module.language_model.encoder.layers.0.self_attention.ParallelAttention.forward.0": [
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1198, in forward, \n self.self_attention(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/transformer/transformer.py, line 21, in row_parallel_forward, \n output = forward_func(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Module.module.module.language_model.encoder.layers.0.ParallelTransformerLayer.forward.0": [
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "Torch.cos.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py, line 76, in torch_op_template, \n return TorchOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/fusions/rotary_pos_embedding.py, line 16, in wrapper, \n cos_ = torch.cos(freqs).to(t.dtype)",
        "File /path_to_net/PanGu/pangu/core/fusions/rotary_pos_embedding.py, line 13, in wrapper, \n t = fn(t, freqs, rotary_interleaved)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/models/common/embeddings/rotary_pos_embedding.py, line 313, in apply_rotary_pos_emb, \n return apply_rotary_pos_emb_bshd(t, freqs, rotary_interleaved=config.rotary_interleaved)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 738, in parallel_attention_forward, \n query_layer = apply_rotary_pos_emb(query_layer, q_pos_emb, self.config)",
        "File /path_to_net/PanGu/pangu/model/transformer.py, line 97, in wrapper, \n return fn(self, hidden_states, attention_mask,",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1198, in forward, \n self.self_attention(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/transformer/transformer.py, line 21, in row_parallel_forward, \n output = forward_func(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ],
    "NPU.npu_fusion_attention.0.forward": [
        "File /path_to_package/mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py, line 78, in npu_op_template, \n return NpuOPTemplate(op_name, hook)(*args, **kwargs)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 525, in flash_self_attention_forward, \n output = torch_npu.npu_fusion_attention(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 757, in parallel_attention_forward, \n context_layer = self.core_attention_flash(query_layer, key_layer, value_layer, attention_mask)",
        "File /path_to_net/PanGu/pangu/model/transformer.py, line 97, in wrapper, \n return fn(self, hidden_states, attention_mask,",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1198, in forward, \n self.self_attention(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/core/transformer/transformer.py, line 21, in row_parallel_forward, \n output = forward_func(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/transformer.py, line 1832, in forward, \n hidden_states = layer(",
        "File /path_to_package/third_party/MindSpeed/mindspeed/model/transformer.py, line 349, in wrapper, \n return fn(self, hidden_states, attention_mask, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/language_model.py, line 500, in forward, \n encoder_output = self.encoder(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/gpt_model.py, line 86, in forward, \n lm_output = self.language_model(",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/legacy/model/module.py, line 190, in forward, \n outputs = self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/distributed/distributed_data_parallel.py, line 179, in forward, \n return self.module(*inputs, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
        "File /path_to_package/site-packages/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 247, in forward_step, \n output_tensor = model(tokens, position_ids, attention_mask,",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 193, in forward_step, \n output_tensor, loss_func = forward_step_func(data_iterator, model)",
        "File /path_to_net/third_party/Megatron-LM/megatron/core/pipeline_parallel/schedules.py, line 1225, in forward_backward_pipelining_without_interleaving, \n output_tensor = forward_step(",
        "File /path_to_net/third_party/Megatron-LM/megatron/training/training.py, line 624, in train_step, \n losses_reduced = forward_backward_func(",
        "File /path_to_net/PanGu/pangu/training/auto_parallel_wrapper.py, line 34, in wrapper, \n ret = train_step(*args, **kwargs)",
        "File /path_to_net/PanGu/pangu/training/training.py, line 495, in train, \n train_step(forward_step_func,",
        "File /path_to_net/PanGu/pangu/training/training.py, line 303, in pretrain, \n iteration, num_floating_point_operations_so_far = train(",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 372, in main, \n pretrain(train_valid_test_datasets_provider,",
        "File /path_to_net/PanGu/pretrain_gpt.py, line 392, in <module>, \n main()"
    ]
}