9bca5ad9创建于 2023年7月20日历史提交

{
  "schemaVersion": 1,
  
  "computeProperties": [
    
    {
      "id": 0, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34084028416,
      "major": 7, "minor": 0,
      "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048,
      "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32,
      "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304,
      "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304
    },

    {
      "id": 1, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216,
      "major": 7, "minor": 0,
      "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048,
      "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32,
      "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304,
      "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304
    },

    {
      "id": 2, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216,
      "major": 7, "minor": 0,
      "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048,
      "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32,
      "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304,
      "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304
    },

    {
      "id": 3, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216,
      "major": 7, "minor": 0,
      "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048,
      "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32,
      "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304,
      "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304
    }
  ],
  "traceEvents": [
  
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187223197, "dur": 21,
    "args": {
       "Device": 24572, "External id": 2,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187223264, "dur": 5,
    "args": {
       "Device": 24572, "External id": 3,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zeros", "pid": 24572, "tid": "24572",
    "ts": 1621401187223182, "dur": 99,
    "args": {
       "Device": 24572, "External id": 1,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187223376, "dur": 19,
    "args": {
       "Device": 24572, "External id": 5,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187223480, "dur": 18,
    "args": {
       "Device": 24572, "External id": 7,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187223530, "dur": 5,
    "args": {
       "Device": 24572, "External id": 8,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zeros", "pid": 24572, "tid": "24572",
    "ts": 1621401187223469, "dur": 72,
    "args": {
       "Device": 24572, "External id": 6,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187223622, "dur": 19,
    "args": {
       "Device": 24572, "External id": 10,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187223790, "dur": 12,
    "args": {
       "Device": 24572, "External id": 13,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::unsqueeze", "pid": 24572, "tid": "24572",
    "ts": 1621401187223777, "dur": 50,
    "args": {
       "Device": 24572, "External id": 12,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187223850, "dur": 7,
    "args": {
       "Device": 24572, "External id": 15,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::unsqueeze", "pid": 24572, "tid": "24572",
    "ts": 1621401187223841, "dur": 24,
    "args": {
       "Device": 24572, "External id": 14,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187223904, "dur": 16,
    "args": {
       "Device": 24572, "External id": 18,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::resize_", "pid": 24572, "tid": "24572",
    "ts": 1621401187223945, "dur": 14,
    "args": {
       "Device": 24572, "External id": 19,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_cat", "pid": 24572, "tid": "24572",
    "ts": 1621401187223888, "dur": 87,
    "args": {
       "Device": 24572, "External id": 17,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::cat", "pid": 24572, "tid": "24572",
    "ts": 1621401187223876, "dur": 106,
    "args": {
       "Device": 24572, "External id": 16,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::stack", "pid": 24572, "tid": "24572",
    "ts": 1621401187223752, "dur": 245,
    "args": {
       "Device": 24572, "External id": 11,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187224094, "dur": 12,
    "args": {
       "Device": 24572, "External id": 22,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::unsqueeze", "pid": 24572, "tid": "24572",
    "ts": 1621401187224074, "dur": 43,
    "args": {
       "Device": 24572, "External id": 21,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187224137, "dur": 6,
    "args": {
       "Device": 24572, "External id": 24,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::unsqueeze", "pid": 24572, "tid": "24572",
    "ts": 1621401187224128, "dur": 21,
    "args": {
       "Device": 24572, "External id": 23,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187224184, "dur": 15,
    "args": {
       "Device": 24572, "External id": 27,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::resize_", "pid": 24572, "tid": "24572",
    "ts": 1621401187224223, "dur": 12,
    "args": {
       "Device": 24572, "External id": 28,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_cat", "pid": 24572, "tid": "24572",
    "ts": 1621401187224169, "dur": 79,
    "args": {
       "Device": 24572, "External id": 26,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::cat", "pid": 24572, "tid": "24572",
    "ts": 1621401187224159, "dur": 96,
    "args": {
       "Device": 24572, "External id": 25,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::stack", "pid": 24572, "tid": "24572",
    "ts": 1621401187224056, "dur": 213,
    "args": {
       "Device": 24572, "External id": 20,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 24572, "tid": "24572",
    "ts": 1621401187223604, "dur": 725,
    "args": {
       "Device": 24572, "External id": 9,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187224415, "dur": 54,
    "args": {
       "Device": 24572, "External id": 30,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::copy_", "pid": 24572, "tid": "24572",
    "ts": 1621401187224496, "dur": 80,
    "args": {
       "Device": 24572, "External id": 31,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::to", "pid": 24572, "tid": "24572",
    "ts": 1621401187224398, "dur": 193,
    "args": {
       "Device": 24572, "External id": 29,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187224645, "dur": 51,
    "args": {
       "Device": 24572, "External id": 33,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::copy_", "pid": 24572, "tid": "24572",
    "ts": 1621401187224720, "dur": 65,
    "args": {
       "Device": 24572, "External id": 34,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::to", "pid": 24572, "tid": "24572",
    "ts": 1621401187224631, "dur": 168,
    "args": {
       "Device": 24572, "External id": 32,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187224956, "dur": 14,
    "args": {
       "Device": 24572, "External id": 38,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24572",
    "ts": 1621401187224945, "dur": 37,
    "args": {
       "Device": 24572, "External id": 37,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24572",
    "ts": 1621401187224917, "dur": 101,
    "args": {
       "Device": 24572, "External id": 36,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24572",
    "ts": 1621401187225058, "dur": 33,
    "args": {
       "Device": 24572, "External id": 40,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 23
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187225181, "dur": 41,
    "args": {
       "Device": 24572, "External id": 42,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mm", "pid": 24572, "tid": "24572",
    "ts": 1621401187225112, "dur": 197,
    "args": {
       "Device": 24572, "External id": 41,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 23
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24572",
    "ts": 1621401187225367, "dur": 17,
    "args": {
       "Device": 24572, "External id": 44,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572",
    "ts": 1621401187225336, "dur": 79,
    "args": {
       "Device": 24572, "External id": 43,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 24
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::matmul", "pid": 24572, "tid": "24572",
    "ts": 1621401187225037, "dur": 394,
    "args": {
       "Device": 24572, "External id": 39,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 23
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187225449, "dur": 107,
    "args": {
       "Device": 24572, "External id": 45,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 25
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::linear", "pid": 24572, "tid": "24572",
    "ts": 1621401187224907, "dur": 664,
    "args": {
       "Device": 24572, "External id": 35,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187225662, "dur": 25,
    "args": {
       "Device": 24572, "External id": 47,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::resize_", "pid": 24572, "tid": "24572",
    "ts": 1621401187225746, "dur": 30,
    "args": {
       "Device": 24572, "External id": 50,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::clamp_min", "pid": 24572, "tid": "24572",
    "ts": 1621401187225721, "dur": 105,
    "args": {
       "Device": 24572, "External id": 49,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::clamp", "pid": 24572, "tid": "24572",
    "ts": 1621401187225709, "dur": 128,
    "args": {
       "Device": 24572, "External id": 48,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::clamp", "pid": 24572, "tid": "24572",
    "ts": 1621401187225606, "dur": 263,
    "args": {
       "Device": 24572, "External id": 46,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 26
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187225978, "dur": 14,
    "args": {
       "Device": 24572, "External id": 54,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24572",
    "ts": 1621401187225968, "dur": 36,
    "args": {
       "Device": 24572, "External id": 53,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24572",
    "ts": 1621401187225941, "dur": 98,
    "args": {
       "Device": 24572, "External id": 52,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 27
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24572",
    "ts": 1621401187226077, "dur": 60,
    "args": {
       "Device": 24572, "External id": 56,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 28
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187226233, "dur": 41,
    "args": {
       "Device": 24572, "External id": 58,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mm", "pid": 24572, "tid": "24572",
    "ts": 1621401187226161, "dur": 197,
    "args": {
       "Device": 24572, "External id": 57,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 29
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24572",
    "ts": 1621401187226416, "dur": 17,
    "args": {
       "Device": 24572, "External id": 60,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572",
    "ts": 1621401187226384, "dur": 79,
    "args": {
       "Device": 24572, "External id": 59,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 30
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::matmul", "pid": 24572, "tid": "24572",
    "ts": 1621401187226057, "dur": 422,
    "args": {
       "Device": 24572, "External id": 55,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 28
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187226497, "dur": 103,
    "args": {
       "Device": 24572, "External id": 61,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 31
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::linear", "pid": 24572, "tid": "24572",
    "ts": 1621401187225932, "dur": 683,
    "args": {
       "Device": 24572, "External id": 51,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 27
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::broadcast_tensors", "pid": 24572, "tid": "24572",
    "ts": 1621401187226708, "dur": 11,
    "args": {
       "Device": 24572, "External id": 62,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 32
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187226827, "dur": 41,
    "args": {
       "Device": 24572, "External id": 64,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187226955, "dur": 35,
    "args": {
       "Device": 24572, "External id": 66,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187227020, "dur": 11,
    "args": {
       "Device": 24572, "External id": 67,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::sum", "pid": 24572, "tid": "24572",
    "ts": 1621401187226930, "dur": 176,
    "args": {
       "Device": 24572, "External id": 65,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mse_loss", "pid": 24572, "tid": "24572",
    "ts": 1621401187226753, "dur": 445,
    "args": {
       "Device": 24572, "External id": 63,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 32
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187227327, "dur": 21,
    "args": {
       "Device": 24572, "External id": 69,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227368, "dur": 5,
    "args": {
       "Device": 24572, "External id": 70,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zeros", "pid": 24572, "tid": "24572",
    "ts": 1621401187227314, "dur": 65,
    "args": {
       "Device": 24572, "External id": 68,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187227464, "dur": 18,
    "args": {
       "Device": 24572, "External id": 72,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227576, "dur": 49,
    "args": {
       "Device": 24572, "External id": 74,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227553, "dur": 97,
    "args": {
       "Device": 24572, "External id": 73,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 33
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227707, "dur": 43,
    "args": {
       "Device": 24572, "External id": 76,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227689, "dur": 79,
    "args": {
       "Device": 24572, "External id": 75,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 33
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227823, "dur": 42,
    "args": {
       "Device": 24572, "External id": 78,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227805, "dur": 77,
    "args": {
       "Device": 24572, "External id": 77,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 33
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227937, "dur": 41,
    "args": {
       "Device": 24572, "External id": 80,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187227919, "dur": 77,
    "args": {
       "Device": 24572, "External id": 79,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 0, "Sequence number": 33
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "Optimizer.zero_grad#SGD.zero_grad", "pid": 24572, "tid": "24572",
    "ts": 1621401187227446, "dur": 606,
    "args": {
       "Device": 24572, "External id": 71,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty_strided", "pid": 24572, "tid": "24572",
    "ts": 1621401187228150, "dur": 53,
    "args": {
       "Device": 24572, "External id": 83,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty_like", "pid": 24572, "tid": "24572",
    "ts": 1621401187228137, "dur": 81,
    "args": {
       "Device": 24572, "External id": 82,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24572",
    "ts": 1621401187228235, "dur": 50,
    "args": {
       "Device": 24572, "External id": 84,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::ones_like", "pid": 24572, "tid": "24572",
    "ts": 1621401187228128, "dur": 169,
    "args": {
       "Device": 24572, "External id": 81,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187228708, "dur": 79,
    "args": {
       "Device": 24572, "External id": 89,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty_like", "pid": 24572, "tid": "24610",
    "ts": 1621401187228680, "dur": 146,
    "args": {
       "Device": 24572, "External id": 88,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24610",
    "ts": 1621401187228885, "dur": 93,
    "args": {
       "Device": 24572, "External id": 91,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24610",
    "ts": 1621401187228858, "dur": 147,
    "args": {
       "Device": 24572, "External id": 90,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zeros_like", "pid": 24572, "tid": "24610",
    "ts": 1621401187228647, "dur": 369,
    "args": {
       "Device": 24572, "External id": 87,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610",
    "ts": 1621401187229048, "dur": 122,
    "args": {
       "Device": 24572, "External id": 92,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610",
    "ts": 1621401187228603, "dur": 614,
    "args": {
       "Device": 24572, "External id": 86,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "MseLossBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187228516, "dur": 727,
    "args": {
       "Device": 24572, "External id": 85,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 32
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "AddBackward1", "pid": 24572, "tid": "24610",
    "ts": 1621401187229384, "dur": 17,
    "args": {
       "Device": 24572, "External id": 93,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 31
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187229506, "dur": 73,
    "args": {
       "Device": 24572, "External id": 95,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::sum", "pid": 24572, "tid": "24610",
    "ts": 1621401187229459, "dur": 279,
    "args": {
       "Device": 24572, "External id": 94,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24610",
    "ts": 1621401187229788, "dur": 65,
    "args": {
       "Device": 24572, "External id": 96,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24610",
    "ts": 1621401187230059, "dur": 131,
    "args": {
       "Device": 24572, "External id": 98,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610",
    "ts": 1621401187230028, "dur": 228,
    "args": {
       "Device": 24572, "External id": 97,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24610",
    "ts": 1621401187230405, "dur": 61,
    "args": {
       "Device": 24572, "External id": 101,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::reshape", "pid": 24572, "tid": "24610",
    "ts": 1621401187230383, "dur": 107,
    "args": {
       "Device": 24572, "External id": 100,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187230354, "dur": 146,
    "args": {
       "Device": 24572, "External id": 99,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 30
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187230751, "dur": 22,
    "args": {
       "Device": 24572, "External id": 105,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187230732, "dur": 65,
    "args": {
       "Device": 24572, "External id": 104,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187230710, "dur": 124,
    "args": {
       "Device": 24572, "External id": 103,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::conj", "pid": 24572, "tid": "24610",
    "ts": 1621401187230862, "dur": 7,
    "args": {
       "Device": 24572, "External id": 106,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187230935, "dur": 73,
    "args": {
       "Device": 24572, "External id": 108,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mm", "pid": 24572, "tid": "24610",
    "ts": 1621401187230889, "dur": 235,
    "args": {
       "Device": 24572, "External id": 107,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187231211, "dur": 23,
    "args": {
       "Device": 24572, "External id": 111,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187231191, "dur": 69,
    "args": {
       "Device": 24572, "External id": 110,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187231168, "dur": 129,
    "args": {
       "Device": 24572, "External id": 109,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187231376, "dur": 17,
    "args": {
       "Device": 24572, "External id": 114,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187231360, "dur": 49,
    "args": {
       "Device": 24572, "External id": 113,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187231340, "dur": 100,
    "args": {
       "Device": 24572, "External id": 112,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::conj", "pid": 24572, "tid": "24610",
    "ts": 1621401187231465, "dur": 6,
    "args": {
       "Device": 24572, "External id": 115,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187231534, "dur": 72,
    "args": {
       "Device": 24572, "External id": 117,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mm", "pid": 24572, "tid": "24610",
    "ts": 1621401187231491, "dur": 225,
    "args": {
       "Device": 24572, "External id": 116,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "MmBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187230626, "dur": 1124,
    "args": {
       "Device": 24572, "External id": 102,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 29
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24610",
    "ts": 1621401187231992, "dur": 61,
    "args": {
       "Device": 24572, "External id": 120,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::reshape", "pid": 24572, "tid": "24610",
    "ts": 1621401187231970, "dur": 108,
    "args": {
       "Device": 24572, "External id": 119,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "ViewBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187231941, "dur": 166,
    "args": {
       "Device": 24572, "External id": 118,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 28
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187232305, "dur": 21,
    "args": {
       "Device": 24572, "External id": 124,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187232286, "dur": 62,
    "args": {
       "Device": 24572, "External id": 123,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187232265, "dur": 123,
    "args": {
       "Device": 24572, "External id": 122,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "TBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187232239, "dur": 161,
    "args": {
       "Device": 24572, "External id": 121,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 27
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24610",
    "ts": 1621401187232535, "dur": 85,
    "args": {
       "Device": 24572, "External id": 126,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610",
    "ts": 1621401187232515, "dur": 148,
    "args": {
       "Device": 24572, "External id": 125,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187232790, "dur": 47,
    "args": {
       "Device": 24572, "External id": 129,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::fill_", "pid": 24572, "tid": "24610",
    "ts": 1621401187232866, "dur": 68,
    "args": {
       "Device": 24572, "External id": 130,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::scalar_tensor", "pid": 24572, "tid": "24610",
    "ts": 1621401187232776, "dur": 174,
    "args": {
       "Device": 24572, "External id": 128,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187233023, "dur": 27,
    "args": {
       "Device": 24572, "External id": 132,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_local_scalar_dense", "pid": 24572, "tid": "24610",
    "ts": 1621401187233192, "dur": 6,
    "args": {
       "Device": 24572, "External id": 135,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::item", "pid": 24572, "tid": "24610",
    "ts": 1621401187233184, "dur": 24,
    "args": {
       "Device": 24572, "External id": 134,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::resize_", "pid": 24572, "tid": "24610",
    "ts": 1621401187233251, "dur": 41,
    "args": {
       "Device": 24572, "External id": 136,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::ge", "pid": 24572, "tid": "24610",
    "ts": 1621401187233168, "dur": 182,
    "args": {
       "Device": 24572, "External id": 133,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::ge", "pid": 24572, "tid": "24610",
    "ts": 1621401187232971, "dur": 404,
    "args": {
       "Device": 24572, "External id": 131,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187233430, "dur": 15,
    "args": {
       "Device": 24572, "External id": 139,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::expand", "pid": 24572, "tid": "24610",
    "ts": 1621401187233414, "dur": 62,
    "args": {
       "Device": 24572, "External id": 138,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187233508, "dur": 10,
    "args": {
       "Device": 24572, "External id": 141,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::expand", "pid": 24572, "tid": "24610",
    "ts": 1621401187233494, "dur": 48,
    "args": {
       "Device": 24572, "External id": 140,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187233571, "dur": 10,
    "args": {
       "Device": 24572, "External id": 143,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::expand", "pid": 24572, "tid": "24610",
    "ts": 1621401187233558, "dur": 43,
    "args": {
       "Device": 24572, "External id": 142,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187233649, "dur": 46,
    "args": {
       "Device": 24572, "External id": 145,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::_s_where", "pid": 24572, "tid": "24610",
    "ts": 1621401187233620, "dur": 167,
    "args": {
       "Device": 24572, "External id": 144,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::where", "pid": 24572, "tid": "24610",
    "ts": 1621401187233398, "dur": 409,
    "args": {
       "Device": 24572, "External id": 137,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "ClampBackward1", "pid": 24572, "tid": "24610",
    "ts": 1621401187232724, "dur": 1110,
    "args": {
       "Device": 24572, "External id": 127,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 26
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "AddBackward1", "pid": 24572, "tid": "24610",
    "ts": 1621401187233941, "dur": 12,
    "args": {
       "Device": 24572, "External id": 146,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 25
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187234021, "dur": 46,
    "args": {
       "Device": 24572, "External id": 148,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::sum", "pid": 24572, "tid": "24610",
    "ts": 1621401187233990, "dur": 182,
    "args": {
       "Device": 24572, "External id": 147,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24610",
    "ts": 1621401187234208, "dur": 43,
    "args": {
       "Device": 24572, "External id": 149,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24610",
    "ts": 1621401187234378, "dur": 84,
    "args": {
       "Device": 24572, "External id": 151,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610",
    "ts": 1621401187234357, "dur": 144,
    "args": {
       "Device": 24572, "External id": 150,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::view", "pid": 24572, "tid": "24610",
    "ts": 1621401187234593, "dur": 39,
    "args": {
       "Device": 24572, "External id": 154,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::reshape", "pid": 24572, "tid": "24610",
    "ts": 1621401187234580, "dur": 67,
    "args": {
       "Device": 24572, "External id": 153,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187234561, "dur": 92,
    "args": {
       "Device": 24572, "External id": 152,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 24
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187234803, "dur": 14,
    "args": {
       "Device": 24572, "External id": 158,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187234792, "dur": 41,
    "args": {
       "Device": 24572, "External id": 157,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187234778, "dur": 79,
    "args": {
       "Device": 24572, "External id": 156,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::conj", "pid": 24572, "tid": "24610",
    "ts": 1621401187234874, "dur": 4,
    "args": {
       "Device": 24572, "External id": 159,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24610",
    "ts": 1621401187234918, "dur": 47,
    "args": {
       "Device": 24572, "External id": 161,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::mm", "pid": 24572, "tid": "24610",
    "ts": 1621401187234890, "dur": 149,
    "args": {
       "Device": 24572, "External id": 160,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187235092, "dur": 15,
    "args": {
       "Device": 24572, "External id": 164,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187235080, "dur": 39,
    "args": {
       "Device": 24572, "External id": 163,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187235067, "dur": 75,
    "args": {
       "Device": 24572, "External id": 162,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "MmBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187234734, "dur": 424,
    "args": {
       "Device": 24572, "External id": 155,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 23
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::as_strided", "pid": 24572, "tid": "24610",
    "ts": 1621401187235312, "dur": 13,
    "args": {
       "Device": 24572, "External id": 168,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::transpose", "pid": 24572, "tid": "24610",
    "ts": 1621401187235301, "dur": 40,
    "args": {
       "Device": 24572, "External id": 167,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::t", "pid": 24572, "tid": "24610",
    "ts": 1621401187235288, "dur": 78,
    "args": {
       "Device": 24572, "External id": 166,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "TBackward", "pid": 24572, "tid": "24610",
    "ts": 1621401187235271, "dur": 103,
    "args": {
       "Device": 24572, "External id": 165,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 ,
       "Fwd thread id": 1, "Sequence number": 22
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24610",
    "ts": 1621401187235487, "dur": 85,
    "args": {
       "Device": 24572, "External id": 170,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610",
    "ts": 1621401187235467, "dur": 147,
    "args": {
       "Device": 24572, "External id": 169,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187235803, "dur": 24,
    "args": {
       "Device": 24572, "External id": 172,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zero_", "pid": 24572, "tid": "24572",
    "ts": 1621401187235850, "dur": 5,
    "args": {
       "Device": 24572, "External id": 173,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::zeros", "pid": 24572, "tid": "24572",
    "ts": 1621401187235787, "dur": 75,
    "args": {
       "Device": 24572, "External id": 171,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::empty", "pid": 24572, "tid": "24572",
    "ts": 1621401187235954, "dur": 20,
    "args": {
       "Device": 24572, "External id": 175,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187236091, "dur": 82,
    "args": {
       "Device": 24572, "External id": 176,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187236221, "dur": 70,
    "args": {
       "Device": 24572, "External id": 177,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187236334, "dur": 68,
    "args": {
       "Device": 24572, "External id": 178,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "aten::add_", "pid": 24572, "tid": "24572",
    "ts": 1621401187236444, "dur": 68,
    "args": {
       "Device": 24572, "External id": 179,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "Optimizer.step#SGD.step", "pid": 24572, "tid": "24572",
    "ts": 1621401187235935, "dur": 663,
    "args": {
       "Device": 24572, "External id": 174,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Operator", 
    "name": "ProfilerStep#2", "pid": 24572, "tid": "24572",
    "ts": 1621401187223358, "dur": 13410,
    "args": {
       "Device": 24572, "External id": 4,
       "Trace name": "PyTorch Profiler", "Trace iteration": 0 
       
    }
  },
  {
    "ph": "X", "cat": "Memcpy", 
    "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187224556, "dur": 1,
    "args": {
      "device": 0, "context": 1,
      "stream": 7, "correlation": 311, "external id": 31,
      "bytes": 640, "memory bandwidth (GB/s)": 0.46511627906976744
    }
  },
  {
    "ph": "f", "id": 311, "pid": 0, "tid": "stream 7", "ts": 1621401187224556,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572",
    "ts": 1621401187224533, "dur": 20,
    "args": {
      "cbid": 41, "correlation": 311,
      "external id": 31, "external ts": 1621401187224496
    }
  },
  {
    "ph": "s", "id": 311, "pid": 24572, "tid": 24572, "ts": 1621401187224533,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572",
    "ts": 1621401187224554, "dur": 8,
    "args": {
      "cbid": 131, "correlation": 312,
      "external id": 31, "external ts": 1621401187224496
    }
  },
  {
    "ph": "X", "cat": "Memcpy", 
    "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187224767, "dur": 1,
    "args": {
      "device": 0, "context": 1,
      "stream": 7, "correlation": 323, "external id": 34,
      "bytes": 128, "memory bandwidth (GB/s)": 0.09523809523809523
    }
  },
  {
    "ph": "f", "id": 323, "pid": 0, "tid": "stream 7", "ts": 1621401187224767,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572",
    "ts": 1621401187224752, "dur": 12,
    "args": {
      "cbid": 41, "correlation": 323,
      "external id": 34, "external ts": 1621401187224720
    }
  },
  {
    "ph": "s", "id": 323, "pid": 24572, "tid": 24572, "ts": 1621401187224752,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572",
    "ts": 1621401187224765, "dur": 7,
    "args": {
      "cbid": 131, "correlation": 324,
      "external id": 34, "external ts": 1621401187224720
    }
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572",
    "ts": 1621401187225253, "dur": 2,
    "args": {
      "cbid": 251, "correlation": 332,
      "external id": 41, "external ts": 1621401187225112
    }
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void gemmSN_TN_kernel_64addr<float, 128, 16, 2, 4, 8, 9, false, cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float> >(cublasGemmSmallNParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187225275, "dur": 3,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 333, "external id": 41,
      "registers per thread": 72,
      "shared memory": 13824,
      "blocks per SM": 0.025,
      "warps per SM": 0.1,
      "grid": [1, 2, 1],
      "block": [128, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 333, "pid": 0, "tid": "stream 7", "ts": 1621401187225275,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187225258, "dur": 16,
    "args": {
      "cbid": 211, "correlation": 333,
      "external id": 41, "external ts": 1621401187225112
    }
  },
  {
    "ph": "s", "id": 333, "pid": 24572, "tid": 24572, "ts": 1621401187225258,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::unrolled_elementwise_kernel<at::native::AddFunctor<float>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187225530, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 338, "external id": 45,
      "registers per thread": 22,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 338, "pid": 0, "tid": "stream 7", "ts": 1621401187225530,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187225512, "dur": 16,
    "args": {
      "cbid": 211, "correlation": 338,
      "external id": 45, "external ts": 1621401187225449
    }
  },
  {
    "ph": "s", "id": 338, "pid": 24572, "tid": 24572, "ts": 1621401187225512,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187225820, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 352, "external id": 49,
      "registers per thread": 18,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 352, "pid": 0, "tid": "stream 7", "ts": 1621401187225820,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187225803, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 352,
      "external id": 49, "external ts": 1621401187225721
    }
  },
  {
    "ph": "s", "id": 352, "pid": 24572, "tid": 24572, "ts": 1621401187225803,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572",
    "ts": 1621401187226305, "dur": 2,
    "args": {
      "cbid": 251, "correlation": 363,
      "external id": 57, "external ts": 1621401187226161
    }
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void gemmSN_TN_kernel_64addr<float, 128, 16, 2, 4, 8, 9, false, cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float> >(cublasGemmSmallNParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187226325, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 364, "external id": 57,
      "registers per thread": 72,
      "shared memory": 13824,
      "blocks per SM": 0.025,
      "warps per SM": 0.1,
      "grid": [1, 2, 1],
      "block": [128, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 364, "pid": 0, "tid": "stream 7", "ts": 1621401187226325,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187226309, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 364,
      "external id": 57, "external ts": 1621401187226161
    }
  },
  {
    "ph": "s", "id": 364, "pid": 24572, "tid": 24572, "ts": 1621401187226309,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::unrolled_elementwise_kernel<at::native::AddFunctor<float>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187226575, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 369, "external id": 61,
      "registers per thread": 22,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 369, "pid": 0, "tid": "stream 7", "ts": 1621401187226575,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187226558, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 369,
      "external id": 61, "external ts": 1621401187226497
    }
  },
  {
    "ph": "s", "id": 369, "pid": 24572, "tid": 24572, "ts": 1621401187226558,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array<char*, 3> >(int, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187226912, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 377, "external id": 63,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 377, "pid": 0, "tid": "stream 7", "ts": 1621401187226912,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187226895, "dur": 16,
    "args": {
      "cbid": 211, "correlation": 377,
      "external id": 63, "external ts": 1621401187226753
    }
  },
  {
    "ph": "s", "id": 377, "pid": 24572, "tid": 24572, "ts": 1621401187226895,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187227092, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 388, "external id": 65,
      "registers per thread": 32,
      "shared memory": 16,
      "blocks per SM": 0.0125,
      "warps per SM": 0.0125,
      "grid": [1, 1, 1],
      "block": [32, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 388, "pid": 0, "tid": "stream 7", "ts": 1621401187227092,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187227075, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 388,
      "external id": 65, "external ts": 1621401187226930
    }
  },
  {
    "ph": "s", "id": 388, "pid": 24572, "tid": 24572, "ts": 1621401187227075,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187227619, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 395, "external id": 74,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 395, "pid": 0, "tid": "stream 7", "ts": 1621401187227619,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187227601, "dur": 16,
    "args": {
      "cbid": 211, "correlation": 395,
      "external id": 74, "external ts": 1621401187227576
    }
  },
  {
    "ph": "s", "id": 395, "pid": 24572, "tid": 24572, "ts": 1621401187227601,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187227745, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 402, "external id": 76,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 402, "pid": 0, "tid": "stream 7", "ts": 1621401187227745,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187227729, "dur": 14,
    "args": {
      "cbid": 211, "correlation": 402,
      "external id": 76, "external ts": 1621401187227707
    }
  },
  {
    "ph": "s", "id": 402, "pid": 24572, "tid": 24572, "ts": 1621401187227729,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187227859, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 409, "external id": 78,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 409, "pid": 0, "tid": "stream 7", "ts": 1621401187227859,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187227844, "dur": 13,
    "args": {
      "cbid": 211, "correlation": 409,
      "external id": 78, "external ts": 1621401187227823
    }
  },
  {
    "ph": "s", "id": 409, "pid": 24572, "tid": 24572, "ts": 1621401187227844,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187227973, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 416, "external id": 80,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 416, "pid": 0, "tid": "stream 7", "ts": 1621401187227973,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187227958, "dur": 13,
    "args": {
      "cbid": 211, "correlation": 416,
      "external id": 80, "external ts": 1621401187227937
    }
  },
  {
    "ph": "s", "id": 416, "pid": 24572, "tid": 24572, "ts": 1621401187227958,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187228279, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 429, "external id": 84,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 429, "pid": 0, "tid": "stream 7", "ts": 1621401187228279,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187228262, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 429,
      "external id": 84, "external ts": 1621401187228235
    }
  },
  {
    "ph": "s", "id": 429, "pid": 24572, "tid": 24572, "ts": 1621401187228262,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187228962, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 440, "external id": 91,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 440, "pid": 0, "tid": "stream 7", "ts": 1621401187228962,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187228932, "dur": 30,
    "args": {
      "cbid": 211, "correlation": 440,
      "external id": 91, "external ts": 1621401187228885
    }
  },
  {
    "ph": "s", "id": 440, "pid": 24572, "tid": 24610, "ts": 1621401187228932,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::unrolled_elementwise_kernel<at::native::mse_backward_cuda_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float, float)#1}, at::detail::Array<char*, 4>, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::mse_backward_cuda_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float, float)#1}, at::detail::Array<char*, 4>, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187229153, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 446, "external id": 92,
      "registers per thread": 28,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 446, "pid": 0, "tid": "stream 7", "ts": 1621401187229153,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187229127, "dur": 26,
    "args": {
      "cbid": 211, "correlation": 446,
      "external id": 92, "external ts": 1621401187229048
    }
  },
  {
    "ph": "s", "id": 446, "pid": 24572, "tid": 24610, "ts": 1621401187229127,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::reduce_kernel<256, 2, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187229711, "dur": 4,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 460, "external id": 94,
      "registers per thread": 35,
      "shared memory": 16,
      "blocks per SM": 0.0125,
      "warps per SM": 0.00625,
      "grid": [1, 1, 1],
      "block": [1, 16, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 460, "pid": 0, "tid": "stream 7", "ts": 1621401187229711,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187229681, "dur": 30,
    "args": {
      "cbid": 211, "correlation": 460,
      "external id": 94, "external ts": 1621401187229459
    }
  },
  {
    "ph": "s", "id": 460, "pid": 24572, "tid": 24610, "ts": 1621401187229681,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187230162, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 467, "external id": 98,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 467, "pid": 0, "tid": "stream 7", "ts": 1621401187230162,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187230133, "dur": 29,
    "args": {
      "cbid": 211, "correlation": 467,
      "external id": 98, "external ts": 1621401187230059
    }
  },
  {
    "ph": "s", "id": 467, "pid": 24572, "tid": 24610, "ts": 1621401187230133,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610",
    "ts": 1621401187231063, "dur": 4,
    "args": {
      "cbid": 251, "correlation": 480,
      "external id": 107, "external ts": 1621401187230889
    }
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610",
    "ts": 1621401187231069, "dur": 1,
    "args": {
      "cbid": 251, "correlation": 481,
      "external id": 107, "external ts": 1621401187230889
    }
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7",
    "ts": 1621401187231100, "dur": 3,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 482, "external id": 107,
      "registers per thread": 55,
      "shared memory": 16384,
      "blocks per SM": 0.0125,
      "warps per SM": 0.1,
      "grid": [1, 1, 1],
      "block": [256, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 482, "pid": 0, "tid": "stream 7", "ts": 1621401187231100,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187231073, "dur": 27,
    "args": {
      "cbid": 211, "correlation": 482,
      "external id": 107, "external ts": 1621401187230889
    }
  },
  {
    "ph": "s", "id": 482, "pid": 24572, "tid": 24610, "ts": 1621401187231073,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610",
    "ts": 1621401187231658, "dur": 3,
    "args": {
      "cbid": 251, "correlation": 491,
      "external id": 116, "external ts": 1621401187231491
    }
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void gemmSN_NN_kernel<float, 256, 4, 2, 8, 4, 4, false, cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float> >(cublasGemmSmallNParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187231692, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 492, "external id": 116,
      "registers per thread": 64,
      "shared memory": 12288,
      "blocks per SM": 0.05,
      "warps per SM": 0.4,
      "grid": [1, 4, 1],
      "block": [256, 1, 1],
      "theoretical occupancy %": 1
    }
  },
  {
    "ph": "f", "id": 492, "pid": 0, "tid": "stream 7", "ts": 1621401187231692,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187231665, "dur": 27,
    "args": {
      "cbid": 211, "correlation": 492,
      "external id": 116, "external ts": 1621401187231491
    }
  },
  {
    "ph": "s", "id": 492, "pid": 24572, "tid": 24610, "ts": 1621401187231665,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187232603, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 503, "external id": 126,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 503, "pid": 0, "tid": "stream 7", "ts": 1621401187232603,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187232583, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 503,
      "external id": 126, "external ts": 1621401187232535
    }
  },
  {
    "ph": "s", "id": 503, "pid": 24572, "tid": 24610, "ts": 1621401187232583,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187232921, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 513, "external id": 130,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 513, "pid": 0, "tid": "stream 7", "ts": 1621401187232921,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187232901, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 513,
      "external id": 130, "external ts": 1621401187232866
    }
  },
  {
    "ph": "s", "id": 513, "pid": 24572, "tid": 24610, "ts": 1621401187232901,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor<at::native::CompareGEFunctor<float> >, at::detail::Array<char*, 2> >(int, at::native::BUnaryFunctor<at::native::CompareGEFunctor<float> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187233342, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 526, "external id": 133,
      "registers per thread": 16,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 526, "pid": 0, "tid": "stream 7", "ts": 1621401187233342,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187233323, "dur": 18,
    "args": {
      "cbid": 211, "correlation": 526,
      "external id": 133, "external ts": 1621401187233168
    }
  },
  {
    "ph": "s", "id": 526, "pid": 24572, "tid": 24610, "ts": 1621401187233323,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::unrolled_elementwise_kernel<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&, c10::ScalarType)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array<char*, 4>, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&, c10::ScalarType)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array<char*, 4>, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187233770, "dur": 2,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 535, "external id": 144,
      "registers per thread": 26,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 535, "pid": 0, "tid": "stream 7", "ts": 1621401187233770,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187233751, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 535,
      "external id": 144, "external ts": 1621401187233620
    }
  },
  {
    "ph": "s", "id": 535, "pid": 24572, "tid": 24610, "ts": 1621401187233751,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187234156, "dur": 3,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 548, "external id": 147,
      "registers per thread": 32,
      "shared memory": 16,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [4, 16, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 548, "pid": 0, "tid": "stream 7", "ts": 1621401187234156,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187234135, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 548,
      "external id": 147, "external ts": 1621401187233990
    }
  },
  {
    "ph": "s", "id": 548, "pid": 24572, "tid": 24610, "ts": 1621401187234135,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187234445, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 555, "external id": 151,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 555, "pid": 0, "tid": "stream 7", "ts": 1621401187234445,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187234425, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 555,
      "external id": 151, "external ts": 1621401187234378
    }
  },
  {
    "ph": "s", "id": 555, "pid": 24572, "tid": 24610, "ts": 1621401187234425,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610",
    "ts": 1621401187235000, "dur": 2,
    "args": {
      "cbid": 251, "correlation": 568,
      "external id": 160, "external ts": 1621401187234890
    }
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610",
    "ts": 1621401187235004, "dur": 0,
    "args": {
      "cbid": 251, "correlation": 569,
      "external id": 160, "external ts": 1621401187234890
    }
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7",
    "ts": 1621401187235025, "dur": 3,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 570, "external id": 160,
      "registers per thread": 55,
      "shared memory": 16384,
      "blocks per SM": 0.0125,
      "warps per SM": 0.1,
      "grid": [1, 1, 1],
      "block": [256, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 570, "pid": 0, "tid": "stream 7", "ts": 1621401187235025,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187235006, "dur": 17,
    "args": {
      "cbid": 211, "correlation": 570,
      "external id": 160, "external ts": 1621401187234890
    }
  },
  {
    "ph": "s", "id": 570, "pid": 24572, "tid": 24610, "ts": 1621401187235006,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187235555, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 579, "external id": 170,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 579, "pid": 0, "tid": "stream 7", "ts": 1621401187235555,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610",
    "ts": 1621401187235535, "dur": 19,
    "args": {
      "cbid": 211, "correlation": 579,
      "external id": 170, "external ts": 1621401187235487
    }
  },
  {
    "ph": "s", "id": 579, "pid": 24572, "tid": 24610, "ts": 1621401187235535,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187236158, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 585, "external id": 176,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 585, "pid": 0, "tid": "stream 7", "ts": 1621401187236158,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187236138, "dur": 18,
    "args": {
      "cbid": 211, "correlation": 585,
      "external id": 176, "external ts": 1621401187236091
    }
  },
  {
    "ph": "s", "id": 585, "pid": 24572, "tid": 24572, "ts": 1621401187236138,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187236278, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 590, "external id": 177,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 590, "pid": 0, "tid": "stream 7", "ts": 1621401187236278,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187236261, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 590,
      "external id": 177, "external ts": 1621401187236221
    }
  },
  {
    "ph": "s", "id": 590, "pid": 24572, "tid": 24572, "ts": 1621401187236261,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187236390, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 595, "external id": 178,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 595, "pid": 0, "tid": "stream 7", "ts": 1621401187236390,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187236373, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 595,
      "external id": 178, "external ts": 1621401187236334
    }
  },
  {
    "ph": "s", "id": 595, "pid": 24572, "tid": 24572, "ts": 1621401187236373,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Kernel", 
    "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::AddFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": "stream 7",
    "ts": 1621401187236501, "dur": 1,
    "args": {
      "queued": 0, "device": 0, "context": 1,
      "stream": 7, "correlation": 600, "external id": 179,
      "registers per thread": 20,
      "shared memory": 0,
      "blocks per SM": 0.0125,
      "warps per SM": 0.025,
      "grid": [1, 1, 1],
      "block": [64, 1, 1],
      "theoretical occupancy %": 0
    }
  },
  {
    "ph": "f", "id": 600, "pid": 0, "tid": "stream 7", "ts": 1621401187236501,
    "cat": "async", "name": "launch", "bp": "e"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572",
    "ts": 1621401187236483, "dur": 15,
    "args": {
      "cbid": 211, "correlation": 600,
      "external id": 179, "external ts": 1621401187236444
    }
  },
  {
    "ph": "s", "id": 600, "pid": 24572, "tid": 24572, "ts": 1621401187236483,
    "cat": "async", "name": "launch"
  },
  {
    "ph": "X", "cat": "Runtime", 
    "name": "cudaDeviceSynchronize", "pid": 24572, "tid": "24572",
    "ts": 1621401187236853, "dur": 10,
    "args": {
      "cbid": 165, "correlation": 605,
      "external id": 0, "external ts": 0
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0,
    "args": {
      "labels": "CPU"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0,
    "args": {
      "labels": "GPU 0"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0,
    "args": {
      "labels": "GPU 1"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0,
    "args": {
      "labels": "GPU 2"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0,
    "args": {
      "labels": "GPU 3"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0,
    "args": {
      "labels": "GPU 4"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0,
    "args": {
      "labels": "GPU 5"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0,
    "args": {
      "labels": "GPU 6"
    }
  },
  {
    "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0,
    "args": {
      "name": "python"
    }
  },
  {
    "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0,
    "args": {
      "labels": "GPU 7"
    }
  },
  {
    "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24610",
    "args": {
      "name": "thread 24610 (python)"
    }
  },
  {
    "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24572",
    "args": {
      "name": "thread 24572 (python)"
    }
  },
  {
    "ph": "X", "cat": "Trace", "ts": 1621401187223005, "dur": 13896,
    "pid": "Traces", "tid": "PyTorch Profiler",
    "name": "PyTorch Profiler (0)",
    "args": {
      "Op count": 0
    }
  },
  {
    "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g",
    "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1621401187223005
  },
  {
    "name": "Record Window End", "ph": "i", "s": "g",
    "pid": "", "tid": "", "ts": 1621401187237108
  }
, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":1}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":0.0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187224005, "args":{"GPU Utilization":0.0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187225005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187226005, "args":{"GPU Utilization":0.5}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187227005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187228005, "args":{"GPU Utilization":0.2}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187229005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187230005, "args":{"GPU Utilization":0.1}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187231005, "args":{"GPU Utilization":0.5}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187232005, "args":{"GPU Utilization":0.2}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187233005, "args":{"GPU Utilization":0.3}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187234005, "args":{"GPU Utilization":0.4}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187235005, "args":{"GPU Utilization":0.4219409282700422}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187236901, "args":{"GPU Utilization":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187223005, "args":{"Est. SM Efficiency":1}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187223005, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225275, "args":{"Est. SM Efficiency":0.25}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225278, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225530, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225532, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225820, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225821, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226325, "args":{"Est. SM Efficiency":0.25}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226327, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226575, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226577, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226912, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226913, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227092, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227094, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227619, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227620, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227745, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227746, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227859, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227860, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227973, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227974, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228279, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228280, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228962, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228963, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229153, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229155, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229711, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229715, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187230162, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187230163, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231100, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231103, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231692, "args":{"Est. SM Efficiency":0.5}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231694, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232603, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232604, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232921, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232922, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233342, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233343, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233770, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233772, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234156, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234159, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234445, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234446, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235025, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235028, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235555, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235556, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236158, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236159, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236278, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236279, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236390, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236391, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236501, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236502, "args":{"Est. SM Efficiency":0}}]}