Network,Level,Name,Calls_20230829,Avg(us)_20230829,Max(us)_20230829,Min(us)_20230829,Calls_20230908,Avg(us)_20230908,Max(us)_20230908,Min(us)_20230908,Calls_20230913_0,Avg(us)_20230913_0,Max(us)_20230913_0,Min(us)_20230913_0,Calls_20230913_1,Avg(us)_20230913_1,Max(us)_20230913_1,Min(us)_20230913_1,Calls_20230914,Avg(us)_20230914,Max(us)_20230914,Min(us)_20230914,Calls_20230918,Avg(us)_20230918,Max(us)_20230918,Min(us)_20230918,Calls_20230919,Avg(us)_20230919,Max(us)_20230919,Min(us)_20230919
output_gpu,deepspeech2,Fused_ReduceMean_383684264054752582_kernel,1001.0,1.477,3.4560000000000004,1.439,1001.0,1.476,1.984,1.439,10001.0,1.475,3.679,1.439,10001.0,1.475,3.679,1.439,10001.0,1.475,3.616,1.439,10001.0,1.474,1.952,1.439,10001.0,3.111000,3.808000,3.071000
output_gpu,deepspeech2,Fused_Mul_fusion_17060319833796989398_kernel,1001.0,1.388,4.096,1.344,1001.0,1.496,2.112,1.375,10001.0,1.385,4.192,1.3430000000000002,10001.0,1.385,4.192,1.3430000000000002,10001.0,1.385,4.32,1.344,10001.0,1.388,10.944,1.375,10001.0,3.164000,3.744000,3.103000
output_gpu,deepspeech2,Fused_Add_Transpose_split_11641939853427955900_kernel,1001.0,1790.406,1817.641,1763.401,1001.0,809.695,1744.455,800.1489999999999,10001.0,810.826,950.734,799.056,10001.0,810.826,950.734,799.056,10001.0,812.806,952.207,799.9540000000002,10001.0,345.771,1076.019,341.373,10001.0,369.38300000000004,373.468,367.709
output_gpu,deepspeech2,Fused_Mul_fusion_11586879053041252169_kernel,1001.0,1.39,3.232,1.344,1001.0,1.1909999999999998,1.7280000000000002,1.152,10001.0,1.388,3.2960000000000003,1.344,10001.0,1.388,3.2960000000000003,1.344,10001.0,1.388,3.36,1.344,10001.0,1.388,1.792,1.343,10001.0,3.033000,3.583000,2.975000
output_gpu,deepspeech2,Fused_BroadcastTo_inplace_assign_builder_7277678674400646887_kernel,1001.0,58.695,64.607,58.24,1001.0,58.103,61.632,57.728,10001.0,58.167,64.191,57.694,10001.0,58.167,64.191,57.694,10001.0,58.147,64.031,57.631,10001.0,58.165,64.47999999999999,57.599,10001.0,60.407000000000004,64.639,59.775
output_gpu,deepspeech2,Fused_Mul_fusion_9061280092631476395_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.369,2.016,1.3430000000000002,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.369,1.888,1.343,10001.0,3.018000,3.680000,2.943000
output_gpu,deepspeech2,Fused_Tanh_fusion_13338471314311102235_kernel,1001.0,332.211,338.42800000000005,331.163,1001.0,332.272,1006.93,330.81199999999995,10001.0,331.62800000000004,338.39399999999995,330.746,10001.0,331.62800000000004,338.39399999999995,330.746,10001.0,331.644,338.105,330.714,10001.0,331.726,1020.117,330.652,10001.0,334.272,337.981,333.149
output_gpu,deepspeech2,Fused_Add_Reshape_BroadcastTo_split_10032764076833323130_kernel,1001.0,376.766,383.387,375.068,1001.0,265.202,272.413,263.932,10001.0,270.414,277.339,269.083,10001.0,270.414,277.339,269.083,10001.0,270.425,277.46700000000004,268.891,10001.0,251.052,977.428,249.405,10001.0,273.847,276.67,272.31699999999995
output_gpu,deepspeech2,Fused_Mul_fusion_548583029185277258_kernel,1001.0,1.371,3.3280000000000003,1.3430000000000002,1001.0,1.375,6.912000000000001,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.423,1.343,10001.0,1.374,3.264,1.343,10001.0,3.019000,3.712000,2.943000
output_gpu,deepspeech2,Fused_Reshape_BroadcastTo_split_15501133766817128202_kernel,1001.0,270.742,277.91700000000003,269.309,1001.0,202.378,206.813,201.597,10001.0,210.151,221.468,208.572,10001.0,210.151,221.468,208.572,10001.0,210.314,220.86,208.733,10001.0,194.647,908.725,193.182,10001.0,215.75900000000001,225.406,214.078
output_gpu,deepspeech2,Fused_Mul_fusion_1090074150299699800_kernel,1001.0,1.308,4.064,1.279,1001.0,1.128,1.536,1.119,10001.0,1.307,4.352,1.279,10001.0,1.307,4.352,1.279,10001.0,1.307,4.096,1.279,10001.0,1.308,4.352,1.279,10001.0,3.106000,3.616000,3.040000
output_gpu,deepspeech2,Fused_Mul_fusion_5256756168418488859_kernel,1001.0,2.36,5.312,2.272,1001.0,2.37,2.88,2.271,10001.0,2.357,5.343,2.24,10001.0,2.357,5.343,2.24,10001.0,2.358,5.344,2.24,10001.0,2.202,2.784,2.175,10001.0,4.200000,4.992000,4.095000
output_gpu,deepspeech2,Fused_Tanh_Reshape_Transpose_fusion_18051463857331374662_kernel,1001.0,8199.457,8286.007,8101.177,1001.0,2066.388,3060.5,2040.738,10001.0,683.933,798.929,680.37,10001.0,683.933,798.929,680.37,10001.0,683.893,798.8660000000001,680.372,10001.0,2067.57,2810.559,2018.857,10001.0,2233.321,2282.3790000000004,2181.2290000000003
output_gpu,deepspeech2,Fused_BroadcastTo_Mul_Reshape_Mul_split_5747017665540302750_kernel,1001.0,3.282,6.303999999999999,3.168,1001.0,2.817,3.4560000000000004,2.719,10001.0,3.269,6.528,3.135,10001.0,3.269,6.528,3.135,10001.0,3.273,6.368,3.135,10001.0,3.287,3.936,3.136,10001.0,5.544000,6.304000,5.375000
output_gpu,bert_base,Fused_Reciprocal_split_9715664751193780275_kernel,1001.0,1.419,3.392,1.376,1001.0,1.418,1.984,1.376,10001.0,1.419,9.472,1.375,10001.0,1.419,9.472,1.375,10001.0,1.417,3.456,1.375,10001.0,1.417,6.335,1.375,10001.0,3.281000,4.384000,3.200000
output_gpu,bert_base,Fused_Cast_Mul_Mul_ReduceSum_Mul_split_3349199317900906134_kernel,1001.0,6.613,9.216,6.591,1001.0,5.712999999999999,6.5920000000000005,5.695,10001.0,2.47,5.696000000000001,2.431,10001.0,2.47,5.696000000000001,2.431,10001.0,2.469,5.696,2.431,10001.0,2.471,9.536,2.431,10001.0,4.111000,5.087000,4.063000
output_gpu,bert_base,Fused_Reshape_Neg_Mul_split_17795986835084215949_kernel,1001.0,1.241,3.968,1.215,1001.0,1.256,4.096,1.215,10001.0,1.236,4.159,1.215,10001.0,1.236,4.159,1.215,10001.0,1.236,4.0,1.215,10001.0,1.237,8.544,1.215,10001.0,3.167000,4.000000,3.103000
output_gpu,bert_base,Fused_Cast_split_14554913079053108469_kernel,1001.0,27.049,32.576,26.528,1001.0,27.054,32.959,26.528,10001.0,26.734,32.736000000000004,26.143,10001.0,26.734,32.736000000000004,26.143,10001.0,26.842,32.607,26.143,10001.0,25.171,29.375,24.16,10001.0,29.034999999999997,33.022999999999996,28.351000000000003
output_gpu,bert_base,Fused_Cast_BiasAdd_Dropout_Add_fusion_12471240422274247021_kernel,1001.0,55.095,59.679,54.047,1001.0,53.888000000000005,56.863,52.095,10001.0,53.04,59.231,50.815,10001.0,53.04,59.231,50.815,10001.0,53.023,59.199,50.847,10001.0,57.204,61.823,55.136,10001.0,55.922,60.48,54.368
output_gpu,bert_base,Fused_ReduceSum_split_9610770192294330038_kernel,1001.0,2.356,4.704,2.335,1001.0,2.354,3.232,2.335,10001.0,2.077,4.448,2.047,10001.0,2.077,4.448,2.047,10001.0,2.077,4.416,2.047,10001.0,1.784,4.608,1.759,10001.0,3.770000,4.768000,3.711000
output_gpu,bert_base,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Add_Mul_C_more_split_8760957184341881796_kernel,1001.0,42.328,45.31100000000001,41.727,1001.0,36.453,36.992,35.935,10001.0,21.469,24.992,19.903,10001.0,21.469,24.992,19.903,10001.0,12.141,15.136,10.944,10001.0,12.238,15.392,10.943,10001.0,13.579000,14.176000,13.408000
output_gpu,bert_base,Fused_Add_Reshape_Reshape_Add_split_14093257215561905173_kernel,1001.0,49.31399999999999,55.327,48.224,1001.0,47.427,53.791,46.912,10001.0,47.415,54.271,46.752,10001.0,47.415,54.271,46.752,10001.0,47.411,53.75899999999999,46.81500000000001,10001.0,47.699,52.0,46.88,10001.0,49.495999999999995,54.207,48.736000000000004
output_gpu,bert_base,Fused_Cast_BiasAdd_GeLU_fusion_12828253081971380468_kernel,1001.0,116.23,122.654,111.07,1001.0,102.028,105.598,101.247,10001.0,101.556,107.934,100.606,10001.0,101.556,107.934,100.606,10001.0,101.257,107.839,100.414,10001.0,101.15,104.895,99.839,10001.0,103.797,107.77499999999999,103.072
output_gpu,bert_base,Fused_Add_AddN_fusion_15025288824638236115_kernel,1001.0,42.28,48.352,41.599,1001.0,42.295,48.927,41.504,10001.0,41.873000000000005,48.479,41.087,10001.0,41.873000000000005,48.479,41.087,10001.0,42.019,48.67100000000001,41.12,10001.0,42.438,48.96,41.343,10001.0,43.981,48.512,43.007999999999996
output_gpu,bert_base,Fused_Reshape_LessEqual_Sub_LessEqual_LogicalOr_Select_Mul_Maximum_Select_fusion_1537099880519983217_kernel,1001.0,1.808,4.16,1.791,1001.0,1.808,4.224,1.791,10001.0,1.805,4.128,1.791,10001.0,1.805,4.128,1.791,10001.0,1.806,4.288,1.791,10001.0,1.805,2.624,1.791,10001.0,3.640000,4.672000,3.583000
output_gpu,bert_base,Fused_Mul_Mul_ReduceSum_Mul_split_7563720092882377888_kernel,1001.0,6.4110000000000005,8.767999999999999,6.24,1001.0,6.417999999999999,9.056,6.271,10001.0,2.675,5.888,2.655,10001.0,2.675,5.888,2.655,10001.0,2.675,5.952,2.655,10001.0,2.676,5.824,2.655,10001.0,4.704000,5.856000,4.639000
output_gpu,bert_base,Fused_Add_split_17831152759103575343_kernel,1001.0,1.242,3.967,1.215,1001.0,1.246,4.032,1.215,10001.0,1.241,3.968,1.215,10001.0,1.241,3.968,1.215,10001.0,1.24,4.032,1.215,10001.0,1.245,4.0,1.215,10001.0,2.964000,3.488000,2.911000
output_gpu,bert_base,Fused_ReduceSum_split_17160201757039515550_kernel,1001.0,64.8,76.767,63.807,1001.0,64.8,73.887,63.839,10001.0,63.85399999999999,75.51899999999999,63.13399999999999,10001.0,63.85399999999999,75.51899999999999,63.13399999999999,10001.0,63.822,75.007,63.071,10001.0,63.72899999999999,71.872,63.135,10001.0,66.314,75.36,65.599000
output_gpu,bert_base,Fused_Mul_Mul_ReduceSum_Mul_split_4931910940408955701_kernel,1001.0,6.444,8.96,6.4,1001.0,6.442,7.423999999999999,6.431,10001.0,2.494,5.855,2.463,10001.0,2.494,5.855,2.463,10001.0,2.494,6.016,2.463,10001.0,2.494,3.424,2.463,10001.0,4.135000,5.248000,4.063000
output_gpu,bert_base,Fused_Cast_fusion_17892869917064243969_kernel,1001.0,1.235,3.936,1.215,1001.0,1.233,1.568,1.215,10001.0,1.233,4.0,1.215,10001.0,1.233,4.0,1.215,10001.0,1.233,4.0,1.215,10001.0,1.232,1.6,1.215,10001.0,3.174000,3.808000,3.103000
output_gpu,bert_base,Fused_Add_fusion_10054391561945245381_kernel,1001.0,26.531,32.288000000000004,26.048,1001.0,26.519,30.143,26.08,10001.0,26.139,31.968000000000004,25.311,10001.0,26.139,31.968000000000004,25.311,10001.0,26.217,32.096,25.439,10001.0,27.65,31.552,26.527,10001.0,28.331000,31.904000,27.839000
output_gpu,bert_base,Fused_ReduceSum_Mul_split_847463926504720470_kernel,1001.0,18.027,30.303,17.664,1001.0,18.012999999999998,28.319000000000003,17.727,10001.0,18.105,28.288,17.248,10001.0,18.105,28.288,17.248,10001.0,18.026,28.895,17.184,10001.0,17.499000000000002,27.008,16.991,10001.0,19.960000,28.575,19.456000
output_gpu,bert_base,Fused_Cast_Add_Reshape_Transpose_split_8581509506651289405_kernel,1001.0,29.629,32.64,27.999,1001.0,23.435,29.311,22.688,10001.0,22.895,28.703000000000003,21.792,10001.0,22.895,28.703000000000003,21.792,10001.0,23.036,28.735,21.696,10001.0,75.163,743.7040000000001,71.071,10001.0,80.937,82.528,80.511
output_gpu,bert_base,Fused_Cast_Reshape_fusion_16187287696793412283_kernel,1001.0,1.341,4.064,1.311,1001.0,1.345,2.3040000000000003,1.311,10001.0,1.332,4.096,1.311,10001.0,1.332,4.096,1.311,10001.0,1.333,4.288,1.311,10001.0,1.331,1.696,1.311,10001.0,3.122000,3.616000,3.071000
output_gpu,bert_base,Fused_BroadcastTo_inplace_assign_builder_12101948072875812668_kernel,1001.0,1.223,2.9760000000000004,1.1840000000000002,1001.0,1.226,1.568,1.1840000000000002,10001.0,1.207,3.04,1.183,10001.0,1.207,3.04,1.183,10001.0,1.208,3.04,1.183,10001.0,1.207,1.568,1.183,10001.0,2.876000,3.616000,2.783000
output_gpu,bert_base,Fused_Mul_Mul_Cast_ReduceSum_split_10543761844475784408_kernel,1001.0,231.962,258.365,217.246,1001.0,231.507,256.571,217.788,10001.0,48.135,54.655,45.151,10001.0,48.135,54.655,45.151,10001.0,51.29,55.199,48.511,10001.0,50.33,52.384,48.416,10001.0,53.524000,56.448000,51.359000
output_gpu,bert_base,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_17865465815588720940_kernel,1001.0,18.542,30.272,18.048,1001.0,18.494,27.968000000000004,17.983,10001.0,23.587,31.999,21.632,10001.0,23.587,31.999,21.632,10001.0,23.692,32.703,21.728,10001.0,24.354,32.384,21.855,10001.0,26.327000,34.464,24.224000
output_gpu,bert_base,Fused_Add_fusion_11383007565367662187_kernel,1001.0,1.371,3.392,1.3430000000000002,1001.0,1.371,3.424,1.3430000000000002,10001.0,1.369,3.455,1.3430000000000002,10001.0,1.369,3.455,1.3430000000000002,10001.0,1.368,3.455,1.343,10001.0,1.175,1.728,1.151,10001.0,3.009000,3.712000,2.943000
output_gpu,bert_base,Fused_Cast_Mul_Mul_ReduceSum_Mul_split_10027531740183302338_kernel,1001.0,6.646,9.376,6.622999999999999,1001.0,5.7010000000000005,6.752000000000001,5.662999999999999,10001.0,2.46,4.832,2.431,10001.0,2.46,4.832,2.431,10001.0,2.46,4.863,2.431,10001.0,2.109,9.472,2.079,10001.0,4.111000,5.120000,4.032000
output_gpu,bert_base,Fused_Add_Cast_ReduceMax_split_16727254203272830108_kernel,1001.0,1.254,4.064,1.215,1001.0,1.259,2.08,1.216,10001.0,1.433,3.4560000000000004,1.4069999999999998,10001.0,1.433,3.4560000000000004,1.4069999999999998,10001.0,1.433,3.52,1.407,10001.0,1.437,1.952,1.407,10001.0,3.068000,3.776000,3.007000
output_gpu,bert_base,Fused_Mul_Mul_ReduceSum_Mul_split_7030311586437121275_kernel,1001.0,6.58,9.28,6.559,1001.0,6.58,9.248,6.559,10001.0,2.463,4.896,2.431,10001.0,2.463,4.896,2.431,10001.0,2.463,4.864,2.431,10001.0,2.463,4.8,2.431,10001.0,4.114000,5.087000,4.032000
output_gpu,bert_base,Fused_LogicalNot_LogicalAnd_Mul_Select_Assign_fusion_7177650377850726476_kernel,1001.0,1.587,3.744,1.567,1001.0,1.6269999999999998,2.4,1.599,10001.0,1.6269999999999998,4.032,1.599,10001.0,1.6269999999999998,4.032,1.599,10001.0,1.626,3.936,1.599,10001.0,1.405,2.048,1.375,10001.0,3.486000,4.352000,3.423000
output_gpu,bert_base,Fused_Cast_BiasAdd_GeLU_fusion_5307373862246741305_kernel,1001.0,5.433,8.288,5.343999999999999,1001.0,4.263,4.832,4.16,10001.0,4.949,8.0,4.8,10001.0,4.949,8.0,4.8,10001.0,4.955,8.031,4.832,10001.0,4.259,4.896,4.159,10001.0,6.776000,7.680000,6.591000
output_gpu,bert_base,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Add_Mul_C_more_split_5322106696941400025_kernel,1001.0,928.019,1354.735,841.461,1001.0,908.81,2079.193,834.448,10001.0,134.827,149.053,129.213,10001.0,134.827,149.053,129.213,10001.0,173.582,187.932,165.533,10001.0,173.445,884.278,165.95,10001.0,179.704,187.54999999999998,173.214000
output_gpu,bert_base,Fused_Cast_ReduceSum_split_9005003959610787421_kernel,1001.0,2.84,5.856,2.815,1001.0,2.837,3.648,2.815,10001.0,1.666,4.6080000000000005,1.631,10001.0,1.666,4.6080000000000005,1.631,10001.0,1.547,4.288,1.535,10001.0,1.549,9.12,1.535,10001.0,3.277000,3.776000,3.200000
output_gpu,bert_base,Fused_Mul_Mul_ReduceSum_Mul_split_14503017584508981378_kernel,1001.0,9.407,11.743,9.28,1001.0,8.074,8.832,7.968,10001.0,2.7310000000000003,6.047999999999999,2.687,10001.0,2.7310000000000003,6.047999999999999,2.687,10001.0,2.731,5.92,2.687,10001.0,2.732,5.856,2.687,10001.0,4.377000,5.280000,4.319000
output_gpu,bert_base,Fused_Reshape_Add_Cast_Reshape_fusion_1259340029514946964_kernel,1001.0,42.21,47.711000000000006,41.248000000000005,1001.0,41.239,45.503,40.735,10001.0,41.122,47.551,40.575,10001.0,41.122,47.551,40.575,10001.0,41.128,47.839,40.575,10001.0,45.775000000000006,49.439,43.808,10001.0,43.141,47.552,42.496000
output_gpu,bert_base,Fused_Cast_fusion_5207550064602223918_kernel,1001.0,3.425,6.367999999999999,3.359,1001.0,3.435,3.872,3.36,10001.0,3.418,6.272,3.359,10001.0,3.418,6.272,3.359,10001.0,3.429,6.528,3.359,10001.0,4.275,4.704,4.096,10001.0,5.203000,6.048000,5.088000
output_gpu,bert_base,Fused_Cast_fusion_7666694894108764436_kernel,1001.0,1.251,4.096,1.215,1001.0,1.132,1.472,1.119,10001.0,1.25,4.032,1.215,10001.0,1.25,4.032,1.215,10001.0,1.248,4.128,1.215,10001.0,1.074,9.216,1.055,10001.0,2.989000,3.745000,2.943000
output_gpu,bert_base,Fused_Cast_fusion_14392499370391399597_kernel,1001.0,3.3710000000000004,6.144,3.232,1001.0,3.379,3.744,3.264,10001.0,3.37,6.303999999999999,3.232,10001.0,3.37,6.303999999999999,3.232,10001.0,3.369,6.176,3.263,10001.0,2.605,9.216,2.528,10001.0,5.573000,6.239000,5.408000
output_gpu,bert_base,Fused_Cast_fusion_5292518620103935094_kernel,1001.0,128.825,134.783,127.967,1001.0,128.763,131.678,125.79,10001.0,125.76300000000002,134.27,124.125,10001.0,125.76300000000002,134.27,124.125,10001.0,125.6,134.654,123.997,10001.0,140.275,143.35899999999998,137.34199999999998,10001.0,129.599,132.479,128.383
output_gpu,bert_base,Fused_Cast_fusion_3949089336070214905_kernel,1001.0,20.222,26.175,19.776,1001.0,20.238,23.775,19.775,10001.0,20.17,25.856,19.264,10001.0,20.17,25.856,19.264,10001.0,19.949,26.208,19.103,10001.0,21.663,708.824,19.775,10001.0,22.109,26.048000000000002,21.119
output_gpu,bert_base,Fused_Sub_Exp_ReduceSum_split_5022209430239326148_kernel,1001.0,168.037,173.085,164.861,1001.0,169.12599999999998,1214.154,165.50099999999998,10001.0,140.58399999999995,148.254,139.709,10001.0,140.58399999999995,148.254,139.709,10001.0,140.623,148.094,139.677,10001.0,140.74,875.318,139.679,10001.0,142.833,148.15900000000002,142.047000
output_gpu,bert_base,Fused_Cast_fusion_3218767707791726405_kernel,1001.0,20.227,26.4,19.776,1001.0,20.218,23.712,19.807,10001.0,19.793000000000006,26.144,18.943,10001.0,19.793000000000006,26.144,18.943,10001.0,20.026,25.888,19.423,10001.0,21.773,25.472,20.224,10001.0,22.828999999999997,26.720000000000002,21.183
output_gpu,bert_base,Fused_Cast_BiasAdd_Tanh_fusion_1864264321745204290_kernel,1001.0,1.726,4.512,1.695,1001.0,1.442,1.76,1.4069999999999998,10001.0,1.676,4.5760000000000005,1.631,10001.0,1.676,4.5760000000000005,1.631,10001.0,1.676,4.448,1.632,10001.0,1.439,8.704,1.407,10001.0,3.420000,3.904000,3.359000
output_gpu,bert_base,Fused_Reshape_Cast_Cast_Reshape_Sub_Mul_split_2964756766907290183_kernel,1001.0,1.274,4.0,1.247,1001.0,1.281,2.048,1.247,10001.0,1.259,4.064,1.247,10001.0,1.259,4.064,1.247,10001.0,1.259,4.16,1.216,10001.0,1.084,1.408,1.055,10001.0,3.248000,4.033000,3.167000
output_gpu,bert_base,Fused_Reshape_Transpose_split_2202986300204861848_kernel,1001.0,28.704,32.288000000000004,27.168000000000003,1001.0,27.11,31.615,25.76,10001.0,22.459,28.639000000000006,21.183000000000003,10001.0,22.459,28.639000000000006,21.183000000000003,10001.0,22.286,28.863000000000003,21.12,10001.0,74.207,83.135,70.94300000000001,10001.0,81.97699999999999,89.631,79.743
output_gpu,bert_base,Fused_Transpose_split_562174498592760630_kernel,1001.0,28.606,32.384,28.032,1001.0,29.348000000000003,35.327,28.831,10001.0,24.369,29.92,22.975,10001.0,24.369,29.92,22.975,10001.0,24.44,30.08,22.879,10001.0,74.833,85.983,70.975,10001.0,80.63,81.503,80.319
output_gpu,bert_base,Fused_Cast_fusion_15293591300000660893_kernel,1001.0,3.895,6.88,3.839,1001.0,3.896,4.704,3.839,10001.0,3.893,6.976000000000001,3.839,10001.0,3.893,6.976000000000001,3.839,10001.0,3.892,7.04,3.839,10001.0,5.962,6.656,5.728,10001.0,6.135000,6.752000,6.047000
output_gpu,bert_base,Fused_Mul_Mul_ReduceSum_Mul_split_9989202327243490401_kernel,1001.0,1.439,3.4560000000000004,1.4069999999999998,1001.0,1.439,3.52,1.4069999999999998,10001.0,1.4369999999999998,8.767999999999999,1.4069999999999998,10001.0,1.4369999999999998,8.767999999999999,1.4069999999999998,10001.0,1.437,3.52,1.407,10001.0,1.437,1.984,1.407,10001.0,3.308000,4.032000,3.231000
output_gpu,transformer,Fused_Reciprocal_split_9715664751193780275_kernel,1001.0,1.418,3.424,1.376,1001.0,1.217,1.7280000000000002,1.183,10001.0,1.4169999999999998,3.488,1.375,10001.0,1.4169999999999998,3.488,1.375,10001.0,1.417,3.488,1.375,10001.0,1.217,2.24,1.183,10001.0,3.059000,3.808000,3.007000
output_gpu,transformer,Fused_Cast_ReduceSum_split_11562077826069526804_kernel,1001.0,312.498,331.611,287.709,1001.0,288.226,301.85,278.523,10001.0,40.966,51.711000000000006,40.287,10001.0,40.966,51.711000000000006,40.287,10001.0,40.754,51.775,40.095,10001.0,40.801,52.415000000000006,40.095,10001.0,42.931999999999995,51.871,42.336000
output_gpu,transformer,Fused_Cast_fusion_3779720031685675485_kernel,1001.0,34.696,40.256,34.208,1001.0,34.7,37.855,34.239000000000004,10001.0,34.099000000000004,40.479,32.991,10001.0,34.099000000000004,40.479,32.991,10001.0,34.221000000000004,40.288,33.055,10001.0,37.472,748.375,35.424,10001.0,36.544,40.224000000000004,35.647999999999996
output_gpu,transformer,Fused_Mul_Add_Cast_ReduceMax_split_15797943235040388018_kernel,1001.0,112.87,120.798,111.807,1001.0,99.243,104.798,97.534,10001.0,101.898,119.87,97.438,10001.0,101.898,119.87,97.438,10001.0,101.535,120.574,97.406,10001.0,99.582,797.941,97.343,10001.0,111.848,126.527,108.607000
output_gpu,transformer,Fused_Neg_Mul_ReduceSum_split_4248836288393418276_kernel,1001.0,2.975,5.312,2.943,1001.0,2.692,3.52,2.655,10001.0,2.24,5.343999999999999,2.207,10001.0,2.24,5.343999999999999,2.207,10001.0,2.24,5.344,2.207,10001.0,2.24,3.136,2.207,10001.0,4.253000,5.184000,4.191000
output_gpu,transformer,Fused_Sub_Exp_ReduceSum_split_15349239229028166055_kernel,1001.0,114.108,123.102,112.735,1001.0,114.073,119.71,112.478,10001.0,107.105,123.709,103.006,10001.0,107.105,123.709,103.006,10001.0,106.195,122.814,102.91,10001.0,107.042,800.118,103.007,10001.0,113.011,130.238,110.143000
output_gpu,transformer,Fused_Add_RealDiv_split_18428533263909187959_kernel,1001.0,1.486,3.488,1.471,1001.0,1.3430000000000002,2.016,1.311,10001.0,1.484,3.552,1.471,10001.0,1.484,3.552,1.471,10001.0,1.484,3.52,1.471,10001.0,1.484,2.016,1.471,10001.0,3.118000,3.905000,3.040000
output_gpu,transformer,Fused_ReduceSum_split_5286512164795600183_kernel,1001.0,87.191,92.415,83.551,1001.0,75.916,79.71,71.807,10001.0,21.265,30.687,20.672,10001.0,21.265,30.687,20.672,10001.0,21.328000000000003,31.264,20.832,10001.0,21.106,28.352,20.735,10001.0,23.084000,30.976,22.528000
output_gpu,transformer,Fused_Cast_fusion_11587648850823634713_kernel,1001.0,34.689,40.096,34.143,1001.0,33.596,36.831,32.959,10001.0,34.347,40.127,33.311,10001.0,34.347,40.127,33.311,10001.0,34.13999999999999,40.223,32.895,10001.0,37.394,39.967000000000006,35.392,10001.0,36.388,39.84,35.711
output_gpu,transformer,Fused_RealDiv_Cast_Mul_Cast_LessEqual_Cast_Mul_split_10301357873200668306_kernel,1001.0,173.127,179.486,172.31799999999998,1001.0,167.12099999999998,171.805,166.39700000000002,10001.0,167.268,174.71599999999998,166.429,10001.0,167.268,174.71599999999998,166.429,10001.0,167.283,174.78099999999998,166.525,10001.0,167.37099999999998,894.0369999999999,165.534,10001.0,170.007,175.583,169.08599999999998
output_gpu,transformer,Fused_Dropout_7533611284060738716_kernel,1001.0,225.21400000000003,230.493,220.573,1001.0,223.221,228.348,220.316,10001.0,221.026,231.868,219.132,10001.0,221.026,231.868,219.132,10001.0,221.022,231.612,219.292,10001.0,256.463,984.564,251.997,10001.0,227.03300000000002,231.13400000000001,225.182
output_gpu,transformer,Fused_Cast_BiasAdd_fusion_16929090800810496745_kernel,1001.0,127.688,136.19,120.511,1001.0,99.493,102.11,98.558,10001.0,96.783,104.158,95.39,10001.0,96.783,104.158,95.39,10001.0,96.67,104.703,95.007,10001.0,117.851,130.55800000000002,113.854,10001.0,100.611,104.062,99.551
output_gpu,transformer,Fused_Sub_Mul_Cast_Mul_split_9846109345606123648_kernel,1001.0,106.645,112.83,105.727,1001.0,105.449,109.822,104.51,10001.0,105.485,112.03,104.03,10001.0,105.485,112.03,104.03,10001.0,105.469,111.838,104.094,10001.0,105.728,776.725,104.382,10001.0,107.046,111.42299999999999,105.88799999999999
output_gpu,transformer,Fused_Sub_Mul_ReduceSum_split_9576479946113568716_kernel,1001.0,49.556,57.855,48.768,1001.0,48.694,53.695,48.063,10001.0,46.284,53.055,45.376000000000005,10001.0,46.284,53.055,45.376000000000005,10001.0,46.345,53.151,45.343,10001.0,46.147,50.112,45.279,10001.0,48.518,53.055,47.616000
output_gpu,transformer,Fused_DropoutGrad_15920076577436691012_kernel,1001.0,135.878,142.719,133.75799999999998,1001.0,135.458,142.045,133.11700000000002,10001.0,134.07399999999998,142.558,132.57299999999998,10001.0,134.07399999999998,142.558,132.57299999999998,10001.0,134.024,142.11,132.798,10001.0,143.625,859.349,137.95,10001.0,136.73399999999998,140.543,135.55
output_gpu,transformer,Fused_Mul_split_8987365791484717414_kernel,1001.0,1.227,3.904,1.1840000000000002,1001.0,1.111,1.44,1.087,10001.0,1.226,4.224,1.183,10001.0,1.226,4.224,1.183,10001.0,1.225,4.0,1.183,10001.0,1.227,1.696,1.184,10001.0,3.199000,3.777000,3.135000
output_gpu,transformer,Fused_Add_fusion_11383007565367662187_kernel,1001.0,1.37,3.392,1.3430000000000002,1001.0,1.239,1.76,1.215,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.455,1.343,10001.0,1.369,1.888,1.343,10001.0,3.019000,3.712000,2.943000
output_gpu,transformer,Fused_Add_Mul_Cast_LessEqual_Cast_Mul_Add_Cast_ReduceSum_split_3392888406530327560_kernel,1001.0,82.006,89.663,81.279,1001.0,81.381,87.96600000000001,80.542,10001.0,75.389,81.69500000000002,74.462,10001.0,75.389,81.69500000000002,74.462,10001.0,75.36800000000002,82.04700000000001,74.303,10001.0,75.429,82.943,74.495,10001.0,77.779,83.456,76.864000
output_gpu,transformer,Fused_LogicalNot_LogicalAnd_Mul_Select_Assign_fusion_7177650377850726476_kernel,1001.0,1.588,3.712,1.567,1001.0,1.4480000000000002,2.464,1.375,10001.0,1.6269999999999998,3.872,1.599,10001.0,1.6269999999999998,3.872,1.599,10001.0,1.627,3.84,1.599,10001.0,1.404,2.016,1.375,10001.0,3.475000,4.352000,3.423000
output_gpu,transformer,Fused_Reshape_Transpose_fusion_11781969851694400596_kernel,1001.0,35.505,38.944,33.983000000000004,1001.0,30.482,35.711,28.512,10001.0,28.308000000000003,35.135,26.912,10001.0,28.308000000000003,35.135,26.912,10001.0,28.89,34.271,27.392,10001.0,94.488,879.893,94.046,10001.0,106.182,106.81599999999999,105.887
output_gpu,transformer,Fused_Mul_Mul_Add_Cast_split_3672969340957471885_kernel,1001.0,38.38,43.327,37.343,1001.0,35.568000000000005,39.071,34.495,10001.0,35.953,41.087,34.687999999999995,10001.0,35.953,41.087,34.687999999999995,10001.0,36.075,41.599,34.784,10001.0,38.397,41.312000000000005,36.895,10001.0,38.468,42.271,37.28
output_gpu,transformer,Fused_ReduceSum_split_3588669061737312733_kernel,1001.0,22.214,32.224000000000004,21.6,1001.0,21.883000000000003,31.231,21.311,10001.0,21.583,32.416,21.088,10001.0,21.583,32.416,21.088,10001.0,21.749,33.023,21.247,10001.0,21.477,30.272,21.088,10001.0,23.733000,33.792,23.296000
output_gpu,transformer,Fused_Cast_fusion_10918899725122482136_kernel,1001.0,6.376,9.472,6.176,1001.0,5.49,6.656000000000001,5.343,10001.0,6.386,9.728,6.176,10001.0,6.386,9.728,6.176,10001.0,6.375,9.888,6.144,10001.0,9.467,10.464,8.352,10001.0,9.247000,10.303000,8.704000
output_gpu,transformer,Fused_Mul_Mul_Add_Add_Cast_Mul_Mul_Cast_ReduceSum_split_17113415292828551596_kernel,1001.0,801.948,978.58,608.985,1001.0,783.235,1701.984,598.932,10001.0,77.7,82.52600000000001,75.678,10001.0,77.7,82.52600000000001,75.678,10001.0,80.30199999999998,83.80600000000003,78.079,10001.0,80.59700000000001,768.822,78.175,10001.0,83.813000,87.64800000000001,81.695000
output_gpu,transformer,Fused_Mul_fusion_9061280092631476395_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.239,1.888,1.215,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,8.992,1.343,10001.0,1.369,1.888,1.343,10001.0,3.021000,3.712000,2.943000
output_gpu,transformer,Fused_Mul_fusion_13137481608468223955_kernel,1001.0,1.37,3.36,1.3430000000000002,1001.0,1.238,1.76,1.215,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.369,8.544,1.343,10001.0,3.014000,3.680000,2.943000
output_gpu,transformer,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_15211520463956997496_kernel,1001.0,1.379,4.064,1.3430000000000002,1001.0,1.379,4.288,1.344,10001.0,1.376,4.128,1.3430000000000002,10001.0,1.376,4.128,1.3430000000000002,10001.0,1.376,4.128,1.343,10001.0,1.379,4.352,1.343,10001.0,3.118000,3.648000,3.071000
output_gpu,transformer,Fused_Mul_Mul_Cast_Mul_ReduceSum_split_12144552849236552072_kernel,1001.0,134.745,144.543,132.862,1001.0,134.282,141.789,131.518,10001.0,132.985,144.829,130.718,10001.0,132.985,144.829,130.718,10001.0,132.96200000000002,144.15800000000004,130.557,10001.0,133.048,843.989,130.526,10001.0,135.731,141.535,133.758000
output_gpu,transformer,Fused_Mul_Add_Rsqrt_split_18088083862193468307_kernel,1001.0,1.273,3.967,1.247,1001.0,1.095,1.536,1.056,10001.0,1.272,4.031000000000001,1.247,10001.0,1.272,4.031000000000001,1.247,10001.0,1.272,4.16,1.247,10001.0,1.275,4.16,1.247,10001.0,3.032000,3.520000,2.944000
output_gpu,transformer,Fused_Mul_Mul_Mul_Mul_Add_Mul_Mul_Mul_Add_Mul_Add_Cast_Add_Mul_Mul_split_4158667062394560387_kernel,1001.0,99.442,104.223,95.263,1001.0,94.074,97.054,90.655,10001.0,93.991,100.478,91.23,10001.0,93.991,100.478,91.23,10001.0,93.977,99.294,90.942,10001.0,94.25,865.909,91.647,10001.0,98.467,102.11099999999999,96.255
output_gpu,transformer,Fused_Cast_fusion_1801388240467737586_kernel,1001.0,291.491,299.356,287.676,1001.0,286.954,976.526,284.635,10001.0,286.708,300.507,284.603,10001.0,286.708,300.507,284.603,10001.0,286.547,300.699,284.474,10001.0,322.409,1006.419,318.23600000000005,10001.0,293.977,297.214,292.605
output_gpu,transformer,Fused_Transpose_split_5250600446647218235_kernel,1001.0,33.389,37.088,32.448,1001.0,30.576,33.152,29.919,10001.0,28.321,34.271,27.072,10001.0,28.321,34.271,27.072,10001.0,28.592,33.919000000000004,27.071,10001.0,94.315,94.751,94.079,10001.0,106.176,106.783,105.855
output_gpu,transformer,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.392,1.3430000000000002,1001.0,1.421,2.144,1.4069999999999998,10001.0,1.421,3.488,1.376,10001.0,1.421,3.488,1.376,10001.0,1.421,3.52,1.407,10001.0,1.434,9.6,1.407,10001.0,3.057000,3.744000,3.007000
output_gpu,transformer,Fused_Reshape_LessEqual_Sub_LessEqual_LogicalOr_Select_Mul_Maximum_Select_fusion_9635053328753828768_kernel,1001.0,1.808,4.096,1.791,1001.0,1.807,4.159,1.791,10001.0,1.805,4.16,1.791,10001.0,1.805,4.16,1.791,10001.0,1.805,4.192,1.791,10001.0,1.549,2.336,1.535,10001.0,3.390000,4.576000,3.327000
output_gpu,transformer,Fused_Mul_Cast_Reshape_Sub_Mul_split_4308255424198223322_kernel,1001.0,4.761,7.68,4.671,1001.0,2.975,3.648,2.912,10001.0,3.882,6.784,3.743,10001.0,3.882,6.784,3.743,10001.0,3.882,6.752,3.744,10001.0,3.338,3.808,3.231,10001.0,5.608000,6.400000,5.471000
output_gpu,transformer,Fused_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_4581979027189965273_kernel,1001.0,1.5119999999999998,4.192,1.472,1001.0,1.364,1.824,1.3430000000000002,10001.0,1.509,4.256,1.471,10001.0,1.509,4.256,1.471,10001.0,1.508,4.288,1.471,10001.0,1.298,1.632,1.279,10001.0,3.243000,3.745000,3.167000
output_gpu,transformer,Fused_Mul_Mul_Mul_Mul_Add_Mul_Mul_Mul_Add_Mul_Add_Cast_Add_Cast_Mul_Reshape_Mul__more_split_2166374341930194659_kernel,1001.0,96.033,102.622,94.847,1001.0,96.361,103.07,95.103,10001.0,96.709,102.878,95.23,10001.0,96.709,102.878,95.23,10001.0,96.701,102.174,95.294,10001.0,95.823,99.487,94.431,10001.0,100.32199999999999,104.351,98.687
output_gpu,transformer,Fused_Add_Mul_Cast_LessEqual_Cast_Mul_Add_Cast_ReduceSum_split_11514554387134884171_kernel,1001.0,94.433,102.335,93.727,1001.0,94.45,103.358,93.662,10001.0,86.15899999999999,92.862,85.279,10001.0,86.15899999999999,92.862,85.279,10001.0,86.137,92.383,85.214,10001.0,86.233,844.3739999999999,85.247,10001.0,88.68900000000001,93.663,87.423000
output_gpu,transformer,Fused_Mul_Cast_LessEqual_Cast_Mul_Add_Cast_ReduceSum_split_1633386055352906702_kernel,1001.0,90.237,99.199,89.375,1001.0,90.228,97.47,89.406,10001.0,86.006,93.247,85.215,10001.0,86.006,93.247,85.215,10001.0,85.99600000000002,92.67,85.15,10001.0,86.12,890.8679999999999,85.119,10001.0,88.301,94.719,87.390000
output_gpu,transformer,Fused_Mul_Mul_Add_Add_Cast_Add_Mul_Mul_Cast_ReduceSum_split_16692613946467942231_kernel,1001.0,781.7280000000001,1001.811,612.568,1001.0,770.9760000000001,1608.035,623.252,10001.0,96.504,105.021,92.766,10001.0,96.504,105.021,92.766,10001.0,100.304,105.566,96.095,10001.0,100.462,770.71,95.615,10001.0,104.65,110.207,101.023000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_211862368672782341_kernel,1001.0,10.274,12.927,10.208,1001.0,10.274,12.927,10.208,10001.0,2.769,6.207999999999999,2.719,10001.0,6.365,9.28,6.335,10001.0,9.963,13.184,9.919,10001.0,8.545,9.087,8.511,10001.0,11.250000,11.936000,11.199000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_12824154059640471129_kernel,1001.0,357.897,385.435,329.723,1001.0,357.897,385.435,329.723,10001.0,4.313,7.552,4.287,10001.0,43.293,51.711000000000006,42.111,10001.0,2.925,6.016,2.879,10001.0,2.931,5.952,2.88,10001.0,4.942000,5.727000,4.864000
output_gpu,reduction_post_fusion,Fused_Cast_Mul_Mul_ReduceSum_Mul_split_3349199317900906134_kernel,1001.0,6.612,9.024,6.591,1001.0,6.612,9.024,6.591,10001.0,2.47,5.952000000000001,2.431,10001.0,2.47,5.695,2.431,10001.0,2.469,5.728,2.431,10001.0,2.469,3.264,2.431,10001.0,4.130000,5.120000,3.488000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9442880768288371712_kernel,1001.0,18.065,29.791,17.759,1001.0,18.065,29.791,17.759,10001.0,17.225,28.384,16.64,10001.0,17.281000000000002,28.287,16.672,10001.0,17.291,27.807,16.896,10001.0,16.802999999999997,25.12,16.448,10001.0,19.586000,28.767999999999997,18.304000
output_gpu,reduction_post_fusion,Fused_ReduceMean_4742955017844898090_kernel,1001.0,112.59,122.623,111.871,1001.0,112.59,122.623,111.871,10001.0,113.385,128.829,112.222,10001.0,113.755,129.43699999999998,112.254,10001.0,113.621,128.12599999999998,112.222,10001.0,113.617,870.225,112.254,10001.0,118.301,132.223,116.799000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_Sub_Log_Add_Reshape_Sub_Reshape_Mul_split_1086739347885138236_kernel,1001.0,362.829,386.395,330.652,1001.0,362.829,386.395,330.652,10001.0,11.683,16.959,10.848,10001.0,45.623000000000005,52.959,42.783,10001.0,11.497,16.959,10.688,10001.0,11.223,15.296,10.336,10001.0,13.563000,17.024000,12.576000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15456491524569318827_kernel,1001.0,2.1830000000000003,5.343999999999999,2.143,1001.0,2.1830000000000003,5.343999999999999,2.143,10001.0,1.984,9.28,1.951,10001.0,1.305,3.999,1.279,10001.0,1.305,4.063,1.279,10001.0,1.18,1.536,1.151,10001.0,3.059000,3.552000,3.007000
output_gpu,reduction_post_fusion,Fused_Mul_Add_Cast_ReduceMax_split_15797943235040388018_kernel,1001.0,112.863,121.246,111.583,1001.0,112.863,121.246,111.583,10001.0,101.754,121.757,97.182,10001.0,100.89,120.126,97.406,10001.0,102.401,120.958,97.47,10001.0,100.031,795.922,97.342,10001.0,109.711,116.767,108.511000
output_gpu,reduction_post_fusion,Fused_Reshape_Mul_ReduceSum_split_4829910890489653637_kernel,1001.0,28.207,32.8,28.031,1001.0,28.207,32.8,28.031,10001.0,8.57,12.16,8.479,10001.0,7.676,11.104,7.552,10001.0,7.648,10.848,7.488,10001.0,7.672,8.608,7.52,10001.0,10.044000,11.455000,9.888000
output_gpu,reduction_post_fusion,Fused_ReduceMean_383684264054752582_kernel,1001.0,1.476,3.424,1.44,1001.0,1.476,3.424,1.44,10001.0,1.475,3.52,1.439,10001.0,1.475,3.488,1.439,10001.0,1.474,3.52,1.439,10001.0,1.266,1.76,1.247,10001.0,3.109000,3.936000,3.040000
output_gpu,reduction_post_fusion,Fused_ReduceSum_Mul_split_14504185833015688388_kernel,1001.0,2.385,4.8,2.367,1001.0,2.385,4.8,2.367,10001.0,2.095,4.512,2.079,10001.0,2.094,4.5760000000000005,2.079,10001.0,2.094,4.352,2.079,10001.0,1.793,2.432,1.759,10001.0,3.793000,4.705000,3.743000
output_gpu,reduction_post_fusion,Fused_Mul_Sub_ReduceSum_split_7030037449376857834_kernel,1001.0,29.366,34.848,28.768,1001.0,29.366,34.848,28.768,10001.0,27.347,34.816,24.735,10001.0,28.538,34.656,26.047,10001.0,28.332,34.463,25.919,10001.0,25.365,28.671,24.735,10001.0,30.039000,33.728000,29.375000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17082649656160430118_kernel,1001.0,18.317,20.544,18.207,1001.0,18.317,20.544,18.207,10001.0,2.544,6.24,2.496,10001.0,2.544,5.888,2.496,10001.0,2.543,5.824,2.496,10001.0,2.179,3.04,2.143,10001.0,4.599000,5.504000,4.543000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_10403650091473542781_kernel,1001.0,3.749,5.984,3.68,1001.0,3.749,5.984,3.68,10001.0,2.297,6.336,2.271,10001.0,3.661,7.04,3.424,10001.0,2.296,6.048,2.271,10001.0,2.298,6.304,2.271,10001.0,4.037000,5.472000,3.967000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_3024546953100296898_kernel,1001.0,26.727,36.287,25.952,1001.0,26.727,36.287,25.952,10001.0,25.204,37.055,24.703000000000003,10001.0,25.235,37.439,24.703000000000003,10001.0,25.258000000000003,37.823,24.703,10001.0,25.27,33.855000000000004,24.671,10001.0,27.43,38.462999999999994,26.304000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Mul_ReduceSum_split_8557813870137399269_kernel,1001.0,168.06799999999998,173.054,166.174,1001.0,168.06799999999998,173.054,166.174,10001.0,168.89399999999995,176.893,166.141,10001.0,168.976,175.261,165.85299999999998,10001.0,168.85,177.66099999999997,166.077,10001.0,169.269,961.999,167.229,10001.0,169.961,176.671,167.998000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_17369107602907456092_kernel,1001.0,3.963,7.167999999999999,3.935,1001.0,3.963,7.167999999999999,3.935,10001.0,14.31,17.312,14.015,10001.0,4.253,7.007999999999999,4.223,10001.0,4.252,7.2,4.223,10001.0,4.253,4.608,4.223,10001.0,5.803000,6.592000,5.728000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17053086800732851970_kernel,1001.0,1.288,3.968,1.248,1001.0,1.288,3.968,1.248,10001.0,1.525,3.4560000000000004,1.503,10001.0,1.494,4.128,1.408,10001.0,1.524,3.456,1.503,10001.0,1.308,1.824,1.279,10001.0,3.388000,4.064000,3.327000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_12450087900310959907_kernel,1001.0,3.1660000000000004,6.303999999999999,3.135,1001.0,3.1660000000000004,6.303999999999999,3.135,10001.0,1.87,9.312,1.855,10001.0,1.595,4.384,1.567,10001.0,1.596,4.384,1.567,10001.0,1.597,4.768,1.567,10001.0,3.564000,4.129000,3.487000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_17897269356910728285_kernel,1001.0,184.522,198.718,183.678,1001.0,184.522,198.718,183.678,10001.0,184.281,199.004,183.357,10001.0,184.285,198.364,183.388,10001.0,184.296,198.108,183.196,10001.0,184.529,997.486,183.325,10001.0,186.549,197.854,185.374000
output_gpu,reduction_post_fusion,Fused_Neg_Mul_ReduceSum_split_4248836288393418276_kernel,1001.0,2.975,5.312,2.943,1001.0,2.975,5.312,2.943,10001.0,2.24,5.6,2.207,10001.0,2.24,5.343999999999999,2.207,10001.0,2.24,5.344,2.207,10001.0,2.24,5.536,2.207,10001.0,3.939000,4.832000,3.871000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_ReduceSum_split_9548002235260671700_kernel,1001.0,7.356,13.215,7.232,1001.0,7.356,13.215,7.232,10001.0,5.355,9.696,5.247000000000001,10001.0,5.363,9.728,5.247999999999999,10001.0,5.352,9.728,5.216,10001.0,5.361999999999999,10.111,5.247,10001.0,6.979000,8.000000,6.848000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_15349239229028166055_kernel,1001.0,114.127,122.783,112.702,1001.0,114.127,122.783,112.702,10001.0,106.141,123.486,102.334,10001.0,106.664,121.118,103.07,10001.0,106.339,122.526,102.942,10001.0,106.158,120.478,102.974,10001.0,112.94800000000001,129.055,110.143000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9610770192294330038_kernel,1001.0,2.355,4.672,2.335,1001.0,2.355,4.672,2.335,10001.0,1.88,2.656,1.855,10001.0,2.077,4.416,2.047,10001.0,2.077,4.448,2.047,10001.0,1.778,2.528,1.759,10001.0,3.772000,4.768000,3.711000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_11554846572247357113_kernel,1001.0,2.306,5.504,2.271,1001.0,2.306,5.504,2.271,10001.0,1.894,4.96,1.856,10001.0,1.396,4.128,1.375,10001.0,1.397,8.512,1.375,10001.0,1.199,1.536,1.183,10001.0,3.125000,3.648000,3.071000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_Add_split_64190903711288141_kernel,1001.0,5.492000000000001,8.415,5.44,1001.0,5.492000000000001,8.415,5.44,10001.0,5.414,8.64,5.343999999999999,10001.0,5.417000000000001,8.32,5.375,10001.0,5.414,8.32,5.344,10001.0,5.412,5.887,5.343,10001.0,6.993000,9.216000,6.912000
output_gpu,reduction_post_fusion,Fused_ReduceMean_3951761770259855578_kernel,1001.0,121.108,129.695,120.382,1001.0,121.108,129.695,120.382,10001.0,107.184,130.42999999999998,103.806,10001.0,107.472,129.822,103.806,10001.0,105.928,129.59799999999998,103.838,10001.0,104.804,907.44,103.774,10001.0,117.549,124.543,116.959000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13825580897818085376_kernel,1001.0,2.322,5.28,2.303,1001.0,2.322,5.28,2.303,10001.0,1.987,5.183,1.951,10001.0,1.3259999999999998,4.064,1.311,10001.0,1.326,4.16,1.311,10001.0,1.326,1.696,1.311,10001.0,3.055000,3.552000,3.007000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_17620288604821972172_kernel,1001.0,16.625999999999998,19.456,16.511,1001.0,16.625999999999998,19.456,16.511,10001.0,14.414,17.951,12.991,10001.0,4.726,7.776,4.5760000000000005,10001.0,4.727,7.776,4.576,10001.0,4.721,5.28,4.576,10001.0,6.362000,7.040000,6.176000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_5286512164795600183_kernel,1001.0,89.094,93.759,83.551,1001.0,89.094,93.759,83.551,10001.0,21.317,31.423,20.703000000000003,10001.0,21.404,32.319,20.927,10001.0,21.262,31.423,20.736,10001.0,21.207,597.878,20.703,10001.0,23.102000,30.686999999999998,22.528000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_Cast_Mul_split_5156124050122370383_kernel,1001.0,217.017,221.821,216.797,1001.0,217.017,221.821,216.797,10001.0,26.479,37.279,23.712,10001.0,30.456,38.271,28.448,10001.0,30.498,38.303,28.479,10001.0,30.315,36.159,28.447,10001.0,31.948000,38.336,31.616000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_5244294228731598086_kernel,1001.0,2.18,5.376,2.143,1001.0,2.18,5.376,2.143,10001.0,2.0,5.056,1.983,10001.0,1.342,4.032,1.311,10001.0,1.342,4.064,1.311,10001.0,1.156,1.472,1.119,10001.0,3.066000,3.616000,3.007000
output_gpu,reduction_post_fusion,Fused_Sub_Mul_ReduceSum_split_16938012674888718937_kernel,1001.0,49.586000000000006,56.99100000000001,48.831,1001.0,49.586000000000006,56.99100000000001,48.831,10001.0,46.187,50.207,45.215,10001.0,46.35,52.607,45.343,10001.0,46.307,52.607,45.343,10001.0,46.358,835.0260000000001,45.311,10001.0,48.522000000000006,53.439,47.679000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_4029972734867171114_kernel,1001.0,14.914,21.023000000000003,14.111,1001.0,14.914,21.023000000000003,14.111,10001.0,14.612,24.415,14.271,10001.0,14.568,24.863000000000003,14.047,10001.0,14.463,25.855,13.919,10001.0,14.608,23.135,14.239,10001.0,16.914000,24.8,16.288000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_14908857373033220908_kernel,1001.0,36.788,48.288,35.968,1001.0,36.788,48.288,35.968,10001.0,34.687999999999995,45.919,34.144,10001.0,34.624,45.567,34.111,10001.0,34.637,45.695,34.143,10001.0,34.462,42.943000000000005,34.111,10001.0,36.604,45.599000000000004,36.223000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_6780797392119313229_kernel,1001.0,4.925,7.904,4.863,1001.0,4.925,7.904,4.863,10001.0,1.951,5.376,1.919,10001.0,1.887,4.638999999999999,1.855,10001.0,1.951,5.088,1.919,10001.0,1.951,2.56,1.919,10001.0,3.656000,4.415000,3.615000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_702931466894663908_kernel,1001.0,1.222,3.904,1.183,1001.0,1.222,3.904,1.183,10001.0,1.396,3.2960000000000003,1.375,10001.0,1.431,4.16,1.4069999999999998,10001.0,1.396,3.328,1.375,10001.0,1.199,1.664,1.183,10001.0,3.043000,3.807000,2.975000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_8269985678721834591_kernel,1001.0,2.727,5.824,2.687,1001.0,2.727,5.824,2.687,10001.0,2.268,5.792000000000001,2.239,10001.0,2.267,5.664,2.239,10001.0,2.266,5.632,2.239,10001.0,2.267,3.136,2.239,10001.0,3.961000,4.896000,3.903000
output_gpu,reduction_post_fusion,Fused_Reshape_Reshape_ReduceSum_Add_RealDiv_split_2212596202541059681_kernel,1001.0,2.627,5.12,2.592,1001.0,2.627,5.12,2.592,10001.0,2.369,5.024,2.335,10001.0,2.3680000000000003,3.488,2.335,10001.0,2.368,4.992,2.335,10001.0,2.367,3.584,2.335,10001.0,4.055000,5.312000,3.999000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_13933643594187294147_kernel,1001.0,7.556,10.4,7.487,1001.0,7.556,10.4,7.487,10001.0,3.0860000000000003,8.576,3.039,10001.0,3.083,6.688,3.039,10001.0,3.085,6.688,3.039,10001.0,2.643,3.68,2.591,10001.0,5.157000,6.304000,5.087000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_7563720092882377888_kernel,1001.0,6.407,8.736,6.24,1001.0,6.407,8.736,6.24,10001.0,2.676,6.464,2.655,10001.0,2.675,5.919,2.624,10001.0,2.675,5.92,2.655,10001.0,2.677,11.584,2.655,10001.0,4.514000,5.536000,3.937000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_5786251060074629964_kernel,1001.0,28.209,33.152,28.031,1001.0,28.209,33.152,28.031,10001.0,8.583,12.16,8.479,10001.0,8.126,11.424,7.871,10001.0,8.092,11.392,7.872,10001.0,8.107,11.679,7.808,10001.0,9.574000,10.208000,9.312000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_14872927465446494485_kernel,1001.0,2.71,5.92,2.656,1001.0,2.71,5.92,2.656,10001.0,2.23,5.696000000000001,2.207,10001.0,2.229,5.407,2.207,10001.0,2.229,5.408,2.207,10001.0,1.908,2.624,1.887,10001.0,3.906000,4.832000,3.839000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17160201757039515550_kernel,1001.0,64.811,75.775,63.743,1001.0,64.811,75.775,63.743,10001.0,63.848,75.071,63.102,10001.0,63.83800000000001,75.13499999999998,63.102,10001.0,63.847,75.423,63.039,10001.0,63.735,72.799,63.071,10001.0,66.423,76.127,65.343000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_6875858508543751623_kernel,1001.0,2.166,4.6080000000000005,2.143,1001.0,2.166,4.6080000000000005,2.143,10001.0,2.105,4.512,2.079,10001.0,2.104,4.48,2.079,10001.0,2.104,4.48,2.079,10001.0,1.803,2.56,1.76,10001.0,3.822000,4.832000,3.775000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_4931910940408955701_kernel,1001.0,6.445,8.896,6.431,1001.0,6.445,8.896,6.431,10001.0,2.495,6.08,2.463,10001.0,2.494,5.824,2.463,10001.0,2.494,5.856,2.463,10001.0,2.494,3.424,2.463,10001.0,4.372000,5.632000,4.319000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_4138107994226861267_kernel,1001.0,3.044,6.08,3.007,1001.0,3.044,6.08,3.007,10001.0,2.773,6.207999999999999,2.72,10001.0,2.774,6.016,2.72,10001.0,2.775,6.048,2.72,10001.0,2.513,3.264,2.464,10001.0,4.447000,5.472000,4.383000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_18399109641449281482_kernel,1001.0,176.403,186.942,175.454,1001.0,176.403,186.942,175.454,10001.0,175.05100000000004,186.525,174.55700000000002,10001.0,175.03900000000002,186.78,174.55599999999998,10001.0,175.042,186.94,174.557,10001.0,175.287,897.648,174.589,10001.0,177.41299999999998,186.81400000000002,176.990000
output_gpu,reduction_post_fusion,Fused_Add_Cast_ReduceMax_split_1742939435773236619_kernel,1001.0,6.9270000000000005,9.824,6.879,1001.0,6.9270000000000005,9.824,6.879,10001.0,3.526,7.04,3.487,10001.0,3.525,6.624,3.487,10001.0,3.526,6.752,3.487,10001.0,3.526,7.456,3.487,10001.0,5.599000,6.432000,5.535000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_11133258566484799689_kernel,1001.0,2.496,5.792000000000001,2.463,1001.0,2.496,5.792000000000001,2.463,10001.0,2.442,6.303999999999999,2.399,10001.0,2.441,6.016,2.399,10001.0,2.44,6.048,2.399,10001.0,2.444,5.92,2.399,10001.0,4.138000,5.152000,4.095000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_9487618319804607812_kernel,1001.0,1.849,3.871,1.823,1001.0,1.849,3.871,1.823,10001.0,1.848,8.448,1.823,10001.0,1.847,3.904,1.823,10001.0,1.847,3.968,1.823,10001.0,1.847,2.528,1.823,10001.0,3.732000,4.480000,3.679000
output_gpu,reduction_post_fusion,Fused_Abs_ReduceMax_fusion_17527458481536417723_kernel,1001.0,2.4290000000000003,4.736000000000001,2.399,1001.0,2.4290000000000003,4.736000000000001,2.399,10001.0,2.472,4.8,2.431,10001.0,2.47,4.768,2.431,10001.0,2.47,4.832,2.431,10001.0,2.472,4.8,2.431,10001.0,4.200000,5.152000,4.127000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_15291999133953476061_kernel,1001.0,219.648,230.845,218.942,1001.0,219.648,230.845,218.942,10001.0,219.343,228.252,218.428,10001.0,219.377,230.588,218.492,10001.0,219.374,231.164,218.524,10001.0,219.37,231.323,218.556,10001.0,221.505,232.222,220.158000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_9843924338213189397_kernel,1001.0,63.968,66.87899999999999,63.839,1001.0,63.968,66.87899999999999,63.839,10001.0,6.409,11.904000000000002,6.303,10001.0,20.918000000000003,27.52,19.743,10001.0,6.898,12.0,6.591,10001.0,6.898,8.16,6.592,10001.0,9.307000,11.168000,8.896000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13799066988795076930_kernel,1001.0,5.026,8.064,4.9910000000000005,1001.0,5.026,8.064,4.9910000000000005,10001.0,2.508,5.568,2.463,10001.0,1.82,4.736000000000001,1.791,10001.0,1.818,4.704,1.791,10001.0,1.821,5.151,1.791,10001.0,3.578000,4.160000,3.519000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_16889214658628119107_kernel,1001.0,24.851,35.648,24.287,1001.0,24.851,35.648,24.287,10001.0,24.127,35.935,23.615,10001.0,24.089,36.511,23.584,10001.0,24.056,34.975,23.584,10001.0,24.088,33.503,23.615,10001.0,26.055000,35.231,25.664000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_15424511383944843442_kernel,1001.0,320.861,344.18800000000005,290.237,1001.0,320.861,344.18800000000005,290.237,10001.0,39.545,50.367,38.719,10001.0,49.654,61.406000000000006,46.783,10001.0,39.241,50.111,38.623,10001.0,39.384,938.128,38.688,10001.0,41.369,51.2,40.832000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_590200782861150805_kernel,1001.0,6.728,10.08,6.687,1001.0,6.728,10.08,6.687,10001.0,1.979,5.472,1.951,10001.0,2.263,5.12,2.239,10001.0,1.942,5.312,1.888,10001.0,1.943,2.784,1.824,10001.0,3.638000,4.576000,3.583000
output_gpu,reduction_post_fusion,Fused_ReduceSum_Mul_split_847463926504720470_kernel,1001.0,18.024,30.24,17.759,1001.0,18.024,30.24,17.759,10001.0,18.1,28.479,17.215,10001.0,18.044,28.479,17.247,10001.0,17.930999999999994,28.255,17.215,10001.0,18.145000000000003,26.495,17.247,10001.0,19.947000,29.183,19.424000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_883961644439154119_kernel,1001.0,4.838,7.712000000000001,4.799,1001.0,4.838,7.712000000000001,4.799,10001.0,2.85,6.399,2.815,10001.0,2.852,6.079,2.815,10001.0,2.852,6.111,2.815,10001.0,2.446,3.135,2.399,10001.0,4.524000,5.601000,4.447000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_3830285062720036199_kernel,1001.0,11.157,14.207,10.976,1001.0,11.157,14.207,10.976,10001.0,3.604,7.231,3.52,10001.0,3.612,6.912000000000001,3.551,10001.0,3.609,6.944,3.551,10001.0,3.616,7.008,3.551,10001.0,5.241000,6.304000,5.183000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_13097791322184337320_kernel,1001.0,1.288,3.968,1.248,1001.0,1.288,3.968,1.248,10001.0,1.525,3.4560000000000004,1.503,10001.0,1.494,4.128,1.471,10001.0,1.524,3.456,1.503,10001.0,1.523,3.616,1.503,10001.0,3.320000,3.904000,3.263000
output_gpu,reduction_post_fusion,Fused_ReduceMean_6014603555853229691_kernel,1001.0,229.767,241.629,216.733,1001.0,229.767,241.629,216.733,10001.0,59.206,74.878,52.575,10001.0,41.355,50.01499999999999,39.68,10001.0,41.352,52.319,39.743,10001.0,40.317,48.575,39.647,10001.0,43.525,51.839999999999996,42.879000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9675091310831314754_kernel,1001.0,262.744,273.564,261.693,1001.0,262.744,273.564,261.693,10001.0,261.32300000000004,273.243,260.763,10001.0,261.341,273.082,260.763,10001.0,261.315,273.371,260.795,10001.0,261.547,970.479,260.795,10001.0,263.90000000000003,273.534,263.326000
output_gpu,reduction_post_fusion,Fused_ReduceMean_16458301695836426887_kernel,1001.0,166.80700000000002,176.606,152.638,1001.0,166.80700000000002,176.606,152.638,10001.0,148.142,176.637,144.41299999999998,10001.0,146.92600000000002,176.956,144.445,10001.0,147.48600000000005,176.253,144.477,10001.0,147.279,177.08499999999998,144.445,10001.0,164.166,186.238,162.014000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_6019367509159564389_kernel,1001.0,8.032,10.976,7.968,1001.0,8.032,10.976,7.968,10001.0,2.424,5.824,2.3680000000000003,10001.0,2.426,5.631,2.399,10001.0,2.425,5.567,2.399,10001.0,2.427,5.76,2.368,10001.0,4.433000,5.312000,4.351000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_2560788304788358183_kernel,1001.0,96.263,98.911,96.158,1001.0,96.263,98.911,96.158,10001.0,2.17,5.696000000000001,2.143,10001.0,2.169,5.408,2.143,10001.0,2.169,5.376,2.143,10001.0,2.171,9.504,2.048,10001.0,4.176000,5.184000,4.095000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_6634546725711362842_kernel,1001.0,113.966,126.047,113.182,1001.0,113.966,126.047,113.182,10001.0,113.667,126.685,112.798,10001.0,113.675,127.07,112.766,10001.0,113.697,124.894,112.798,10001.0,113.833,841.745,112.766,10001.0,115.878,127.743,114.911000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_4124937907018987585_kernel,1001.0,88.47,93.726,83.551,1001.0,88.47,93.726,83.551,10001.0,21.495,32.160000000000004,20.799,10001.0,21.808000000000003,32.031,21.248,10001.0,21.154,31.52,20.767,10001.0,21.153,29.055,20.672,10001.0,23.468000,31.488000000000003,22.912000
output_gpu,reduction_post_fusion,Fused_Reshape_Mul_ReduceSum_split_16702906179051960568_kernel,1001.0,39.816,42.4,39.679,1001.0,39.816,42.4,39.679,10001.0,3.045,7.136,2.943,10001.0,3.047,6.976,2.943,10001.0,3.047,6.816,2.943,10001.0,3.052,4.448,2.944,10001.0,4.723000,5.952000,4.607000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9236644468982006718_kernel,1001.0,19.76,28.8,19.392,1001.0,19.76,28.8,19.392,10001.0,19.271,30.911,18.752,10001.0,19.257,31.327,18.751,10001.0,19.192,31.136,18.688,10001.0,18.901,28.448,18.591,10001.0,21.424000,31.136,20.928000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_10845074449741576939_kernel,1001.0,86.594,95.902,85.311,1001.0,86.594,95.902,85.311,10001.0,80.768,96.542,78.014,10001.0,81.107,94.11,77.79,10001.0,80.89200000000002,96.415,77.566,10001.0,79.61699999999999,777.427,77.694,10001.0,85.42,93.599,83.423000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_2397920035714093970_kernel,1001.0,3.749,5.952000000000001,3.679,1001.0,3.749,5.952000000000001,3.679,10001.0,2.155,5.5360000000000005,2.111,10001.0,2.155,5.28,2.112,10001.0,2.155,5.28,2.111,10001.0,2.156,5.247,2.112,10001.0,4.157000,5.120000,4.095000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_297704009852807951_kernel,1001.0,133.341,144.382,132.767,1001.0,133.341,144.382,132.767,10001.0,132.889,145.118,132.285,10001.0,132.89,144.66899999999995,132.253,10001.0,132.912,145.69299999999996,132.158,10001.0,132.913,146.206,132.222,10001.0,135.212,146.33499999999998,134.399000
output_gpu,reduction_post_fusion,Fused_Reshape_Mul_ReduceSum_split_14582668912295876395_kernel,1001.0,2.257,4.704,2.239,1001.0,2.257,4.704,2.239,10001.0,2.12,5.5360000000000005,2.079,10001.0,2.119,5.44,2.079,10001.0,2.119,5.28,2.08,10001.0,2.119,2.912,2.079,10001.0,3.838000,4.768000,3.775000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_1981296571397398758_kernel,1001.0,2.692,4.96,2.656,1001.0,2.692,4.96,2.656,10001.0,2.147,5.696000000000001,2.111,10001.0,2.147,5.472,2.111,10001.0,2.147,5.248,2.111,10001.0,2.147,5.504,2.111,10001.0,3.845000,4.705000,3.775000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17158479222523381165_kernel,1001.0,10.074,13.12,9.984,1001.0,10.074,13.12,9.984,10001.0,2.015,5.664,1.983,10001.0,3.156,5.984,2.944,10001.0,2.015,5.504,1.983,10001.0,1.73,2.688,1.695,10001.0,4.060000,5.024000,3.999000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13190158174088614872_kernel,1001.0,2.197,5.375,2.175,1001.0,2.197,5.375,2.175,10001.0,2.032,9.376,2.015,10001.0,1.346,4.096,1.311,10001.0,1.346,4.063,1.311,10001.0,1.345,1.728,1.311,10001.0,3.065000,3.616000,3.007000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_18161176003913693487_kernel,1001.0,8.851,11.904000000000002,8.831,1001.0,8.851,11.904000000000002,8.831,10001.0,2.511,6.72,2.464,10001.0,3.299,6.4,3.263,10001.0,2.511,6.56,2.464,10001.0,2.159,7.808,2.112,10001.0,4.207000,5.760000,4.127000
output_gpu,reduction_post_fusion,Fused_Sub_Mul_ReduceSum_split_9576479946113568716_kernel,1001.0,49.575,56.928,48.863,1001.0,49.575,56.928,48.863,10001.0,46.353,53.343,45.375,10001.0,46.304,53.119,45.343,10001.0,46.266000000000005,52.767,45.183,10001.0,46.363,50.335,45.407,10001.0,48.525999999999996,52.831999999999994,47.648000
output_gpu,reduction_post_fusion,Fused_ReduceMean_Reshape_Dropout_fusion_13469292139522287431_kernel,1001.0,246.843,276.733,234.205,1001.0,246.843,276.733,234.205,10001.0,88.525,111.23,70.751,10001.0,49.133,60.255,47.423,10001.0,48.93700000000001,59.742,47.423,10001.0,48.124,55.711,47.423,10001.0,51.617000000000004,58.943000000000005,50.912000
output_gpu,reduction_post_fusion,Fused_ReduceMean_4318638107753239166_kernel,1001.0,69.155,81.98299999999999,68.671,1001.0,69.155,81.98299999999999,68.671,10001.0,69.81099999999999,82.111,68.671,10001.0,70.054,82.495,68.639,10001.0,70.10900000000001,82.62299999999999,68.638,10001.0,69.194,802.2249999999999,68.671,10001.0,72.857,81.791,72.224000
output_gpu,reduction_post_fusion,Fused_Reshape_ReduceSum_Add_RealDiv_split_6710312135617753113_kernel,1001.0,2.408,4.768,2.3680000000000003,1001.0,2.408,4.768,2.3680000000000003,10001.0,2.138,4.5760000000000005,2.111,10001.0,2.138,4.448,2.111,10001.0,2.138,4.48,2.111,10001.0,1.936,2.72,1.919,10001.0,3.815000,4.832000,3.775000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_18167272354202182945_kernel,1001.0,5.252999999999999,7.936,5.152,1001.0,5.252999999999999,7.936,5.152,10001.0,2.8360000000000003,6.272,2.783,10001.0,6.306,9.248,6.271,10001.0,9.986,12.927,9.951,10001.0,8.562,9.088,8.543,10001.0,11.207000,11.968000,11.135000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_6803180149090166107_kernel,1001.0,348.01,358.81199999999995,347.195,1001.0,348.01,358.81199999999995,347.195,10001.0,347.094,359.641,346.361,10001.0,347.098,358.425,346.329,10001.0,347.099,359.065,346.329,10001.0,347.53000000000003,1133.867,346.394,10001.0,349.434,359.933,348.701000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_7047758847107401944_kernel,1001.0,66.315,71.487,64.831,1001.0,66.315,71.487,64.831,10001.0,16.505,26.144,16.032,10001.0,16.897000000000002,25.952,16.415,10001.0,16.585,25.92,16.032,10001.0,16.381,23.776,15.871,10001.0,18.465000,26.176000000000002,17.984000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_8872950071756149136_kernel,1001.0,17.468,24.768,16.928,1001.0,17.468,24.768,16.928,10001.0,16.92,28.0,16.575,10001.0,16.917,28.19100000000001,16.576,10001.0,16.917,27.712,16.575,10001.0,16.59,26.048,16.255,10001.0,18.853000,27.424,18.496000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Cast_ReduceSum_split_10543761844475784408_kernel,1001.0,246.851,259.101,231.101,1001.0,246.851,259.101,231.101,10001.0,47.863,54.399,45.247,10001.0,38.555,45.471,36.255,10001.0,51.333,55.999,48.415,10001.0,50.99,58.047,48.319,10001.0,53.783000,56.896000,50.880000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_13893736644811418886_kernel,1001.0,2.256,4.704,2.239,1001.0,2.256,4.704,2.239,10001.0,2.12,5.535,2.079,10001.0,2.119,5.312,2.08,10001.0,2.119,5.28,2.079,10001.0,2.119,2.88,2.079,10001.0,3.841000,4.736000,3.776000
output_gpu,reduction_post_fusion,Fused_ReduceMean_4445296748968516267_kernel,1001.0,67.225,78.175,66.688,1001.0,67.225,78.175,66.688,10001.0,67.60199999999999,79.774,66.334,10001.0,67.172,79.743,66.271,10001.0,66.994,79.167,66.303,10001.0,67.56700000000001,805.1700000000001,66.303,10001.0,70.023,79.48700000000001,69.375000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_Sqrt_Reshape_Greater_Reshape_Select_split_9040850360720684697_kernel,1001.0,1.454,3.648,1.408,1001.0,1.454,3.648,1.408,10001.0,1.4509999999999998,3.52,1.4069999999999998,10001.0,1.4509999999999998,3.488,1.4069999999999998,10001.0,1.451,3.551,1.407,10001.0,1.451,1.952,1.407,10001.0,3.330000,4.064000,3.263000
output_gpu,reduction_post_fusion,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_17865465815588720940_kernel,1001.0,18.544,27.616,17.984,1001.0,18.544,27.616,17.984,10001.0,24.048,31.967,21.727,10001.0,23.569000000000003,33.823,21.663,10001.0,23.596,33.311,21.855,10001.0,22.118,845.202,21.055,10001.0,26.393000,34.527,24.416000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_5019236563776444250_kernel,1001.0,219.569,242.557,208.701,1001.0,219.569,242.557,208.701,10001.0,44.389,50.495,40.799,10001.0,45.396,51.135,40.863,10001.0,45.266,50.911,40.703,10001.0,45.69,50.495,40.799,10001.0,49.049000,53.663000,47.071000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_12090643136724717713_kernel,1001.0,227.628,240.253,219.581,1001.0,227.628,240.253,219.581,10001.0,90.644,102.462,89.855,10001.0,90.574,102.078,89.823,10001.0,90.558,102.078,89.822,10001.0,90.656,797.874,89.79,10001.0,93.16199999999999,102.11099999999999,92.319000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_17440547290800295862_kernel,1001.0,132.94899999999998,146.559,132.095,1001.0,132.94899999999998,146.559,132.095,10001.0,132.741,146.429,131.678,10001.0,132.744,147.037,131.71,10001.0,132.70299999999995,146.813,131.741,10001.0,132.92100000000002,957.327,131.741,10001.0,134.822,147.39,133.854000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_Mul_Sub_Cast_Cast_ReduceSum_Mul_split_11243362267707836255_kernel,1001.0,5.341,8.16,5.311,1001.0,5.341,8.16,5.311,10001.0,5.341,7.584,5.311,10001.0,5.339,7.616,5.311,10001.0,5.338,7.583,5.311,10001.0,5.34,6.176,5.311,10001.0,7.200000,7.967000,7.135000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_13911676550351113984_kernel,1001.0,158.059,172.09400000000002,157.18200000000002,1001.0,158.059,172.09400000000002,157.18200000000002,10001.0,157.81,172.18800000000005,156.765,10001.0,157.819,172.285,156.893,10001.0,157.809,171.805,156.925,10001.0,158.046,1047.246,156.829,10001.0,159.888,171.99800000000002,158.942000
output_gpu,reduction_post_fusion,Fused_Cast_Mul_Mul_ReduceSum_Mul_split_10027531740183302338_kernel,1001.0,6.645,9.216,6.622999999999999,1001.0,6.645,9.216,6.622999999999999,10001.0,2.461,4.864,2.431,10001.0,2.46,4.928,2.431,10001.0,2.46,4.832,2.431,10001.0,2.46,3.392,2.431,10001.0,4.104000,5.119000,4.063000
output_gpu,reduction_post_fusion,Fused_ReduceMean_7589471855126649108_kernel,1001.0,103.792,112.798,103.135,1001.0,103.792,112.798,103.135,10001.0,91.095,113.054,88.99,10001.0,93.456,885.5509999999998,88.958,10001.0,93.332,112.574,88.958,10001.0,92.747,787.442,88.99,10001.0,101.13,108.095,100.575000
output_gpu,reduction_post_fusion,Fused_Add_Cast_ReduceMax_split_16727254203272830108_kernel,1001.0,1.254,4.096,1.216,1001.0,1.254,4.096,1.216,10001.0,1.433,3.488,1.4069999999999998,10001.0,1.471,4.224,1.439,10001.0,1.433,3.456,1.407,10001.0,1.433,1.984,1.407,10001.0,3.293000,4.031000,3.231000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_6643930332904336521_kernel,1001.0,39.618,49.408,37.983,1001.0,39.618,49.408,37.983,10001.0,36.531,48.799,35.903,10001.0,36.413,48.70399999999999,35.871,10001.0,36.50400000000001,48.672,35.871,10001.0,36.351,45.792,35.871,10001.0,38.577,48.607,38.111000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_14089211308293203352_kernel,1001.0,20.626,30.336,20.096,1001.0,20.626,30.336,20.096,10001.0,18.903,29.343000000000004,18.4,10001.0,18.916,29.503,18.496,10001.0,18.827,29.408,18.368,10001.0,18.734,27.84,18.239,10001.0,21.062000,30.816,20.384000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Mul_Mul_ReduceSum_split_14684688370529604183_kernel,1001.0,7.957999999999999,11.903,7.903,1001.0,7.957999999999999,11.903,7.903,10001.0,6.146,6.528,5.9510000000000005,10001.0,5.371,8.448,5.247000000000001,10001.0,5.319,8.224,5.183,10001.0,5.321,8.32,5.183,10001.0,6.889000,7.584000,6.720000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_999075897968895260_kernel,1001.0,3.574,7.616,3.519,1001.0,3.574,7.616,3.519,10001.0,3.4930000000000003,7.776,3.455,10001.0,3.497,7.582999999999999,3.455,10001.0,3.498,7.648,3.455,10001.0,3.493,5.184,3.455,10001.0,5.138000,6.496000,4.192000
output_gpu,reduction_post_fusion,Fused_Add_ReduceMax_split_17997934085059918063_kernel,1001.0,7.77,10.656,7.711,1001.0,7.77,10.656,7.711,10001.0,4.189,7.648,4.096,10001.0,4.181,7.296,4.095,10001.0,4.183,7.327,4.096,10001.0,4.185,7.296,4.095,10001.0,5.740000,6.720000,5.632000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_6193936635329247067_kernel,1001.0,8.236,11.04,8.192,1001.0,8.236,11.04,8.192,10001.0,3.198,6.272,3.167,10001.0,2.52,5.759,2.495,10001.0,2.521,5.888,2.464,10001.0,2.523,3.328,2.495,10001.0,4.668000,5.280000,4.576000
output_gpu,reduction_post_fusion,Fused_Add_RealDiv_ReduceSum_split_13169468148967519997_kernel,1001.0,6.084,8.16,6.047000000000001,1001.0,6.084,8.16,6.047000000000001,10001.0,6.085,12.864,6.047000000000001,10001.0,6.081,8.224,6.047000000000001,10001.0,6.081,8.352,6.047,10001.0,6.081,6.847,6.047,10001.0,7.414000,8.288000,7.359000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_7030311586437121275_kernel,1001.0,6.58,9.056,6.559,1001.0,6.58,9.056,6.559,10001.0,2.464,4.832,2.431,10001.0,2.463,4.832,2.431,10001.0,2.463,4.896,2.431,10001.0,2.462,3.328,2.431,10001.0,4.247000,5.280000,4.191000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_10404330036365636525_kernel,1001.0,15.022,17.76,14.976,1001.0,15.022,17.76,14.976,10001.0,2.7910000000000004,5.92,2.72,10001.0,3.468,6.72,3.423,10001.0,2.894,6.207,2.847,10001.0,2.892,3.68,2.847,10001.0,4.702000,5.440000,4.639000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_4894321128908295401_kernel,1001.0,130.282,141.086,129.47,1001.0,130.282,141.086,129.47,10001.0,129.878,142.845,129.149,10001.0,129.878,141.694,129.085,10001.0,129.86700000000002,142.237,129.181,10001.0,129.998,1100.716,129.15,10001.0,132.007,141.119,131.263000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_7185430665996753341_kernel,1001.0,236.843,242.333,218.205,1001.0,236.843,242.333,218.205,10001.0,2.889,4.096,2.847,10001.0,3.189,6.784,3.135,10001.0,3.19,7.039,3.135,10001.0,2.743,3.904,2.688,10001.0,4.901000,6.048000,4.800000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_8649327892057153246_kernel,1001.0,34.078,36.703,33.983000000000004,1001.0,34.078,36.703,33.983000000000004,10001.0,3.474,6.816,3.264,10001.0,10.937,14.111,9.727,10001.0,3.734,7.455999999999999,3.679,10001.0,3.744,4.927,3.648,10001.0,5.535000,6.720000,5.439000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_1442034214599389676_kernel,1001.0,144.244,150.20600000000002,144.158,1001.0,144.244,150.20600000000002,144.158,10001.0,109.936,126.493,101.214,10001.0,52.951,65.82300000000001,49.823,10001.0,52.542,65.054,49.663,10001.0,52.86600000000001,1037.23,49.567,10001.0,56.002,64.03200000000001,54.815000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_18187264574057599824_kernel,1001.0,174.66299999999998,197.822,173.054,1001.0,174.66299999999998,197.822,173.054,10001.0,21.237,32.254999999999995,20.319000000000003,10001.0,27.478,35.166999999999994,25.311,10001.0,20.858,31.84,20.319,10001.0,20.736,684.564,20.128,10001.0,22.943000,31.615,21.760000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_9005003959610787421_kernel,1001.0,2.843,5.856,2.815,1001.0,2.843,5.856,2.815,10001.0,1.666,4.704,1.631,10001.0,1.546,4.288,1.504,10001.0,1.547,4.32,1.535,10001.0,1.547,1.888,1.535,10001.0,3.284000,3.776000,3.200000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_3588669061737312733_kernel,1001.0,22.212,32.415,21.632,1001.0,22.212,32.415,21.632,10001.0,21.768,33.504,21.216,10001.0,21.781,32.768,21.247,10001.0,21.722,33.216,21.247,10001.0,21.569000000000003,30.304,21.183,10001.0,23.730000,33.056000000000004,23.327000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_12188880778181902931_kernel,1001.0,341.01,353.56300000000005,329.468,1001.0,341.01,353.56300000000005,329.468,10001.0,248.004,261.307,247.323,10001.0,248.056,261.69100000000003,247.26,10001.0,248.064,261.403,247.227,10001.0,248.61,1059.342,247.228,10001.0,251.919,260.958,250.685000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_3366192580175348419_kernel,1001.0,238.882,243.005,218.717,1001.0,238.882,243.005,218.717,10001.0,2.696,6.207999999999999,2.655,10001.0,2.7030000000000003,5.888,2.655,10001.0,2.698,5.856,2.655,10001.0,2.447,3.296,2.399,10001.0,4.391000,5.312000,4.319000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_13074136462214727637_kernel,1001.0,2.517,5.728,2.495,1001.0,2.517,5.728,2.495,10001.0,2.531,5.696000000000001,2.495,10001.0,1.365,4.127,1.3430000000000002,10001.0,1.572,4.352,1.535,10001.0,1.572,4.352,1.535,10001.0,3.318000,3.808000,3.263000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_14503017584508981378_kernel,1001.0,9.4,11.775,9.279,1001.0,9.4,11.775,9.279,10001.0,2.7310000000000003,6.176,2.687,10001.0,2.7310000000000003,6.176,2.687,10001.0,2.73,5.92,2.687,10001.0,2.341,3.136,2.303,10001.0,4.641000,5.792000,4.575000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_Mul_Sub_ReduceSum_Mul_split_3131596087248341812_kernel,1001.0,46.08,48.927,45.75899999999999,1001.0,46.08,48.927,45.75899999999999,10001.0,16.663,21.12,16.511,10001.0,6.939,10.4,6.5920000000000005,10001.0,16.669,20.895,16.383,10001.0,16.11,17.856,14.88,10001.0,17.870000,18.464000,17.567000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_14672270188989924623_kernel,1001.0,11.133,14.4,10.88,1001.0,11.133,14.4,10.88,10001.0,2.9760000000000004,6.656000000000001,2.911,10001.0,2.979,6.335,2.912,10001.0,2.975,6.367,2.912,10001.0,2.554,3.52,2.496,10001.0,4.696000,5.440000,4.608000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_2064422502572383303_kernel,1001.0,2.3,5.5360000000000005,2.271,1001.0,2.3,5.5360000000000005,2.271,10001.0,1.957,4.928,1.92,10001.0,1.324,4.064,1.311,10001.0,1.324,4.096,1.311,10001.0,1.324,4.095,1.311,10001.0,3.039000,4.224000,2.975000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15289644442007799394_kernel,1001.0,12.451,18.944000000000003,12.0,1001.0,12.451,18.944000000000003,12.0,10001.0,10.403,13.728,9.984,10001.0,10.380999999999998,13.216,9.663,10001.0,10.404,13.568,9.983,10001.0,10.402,13.76,9.887,10001.0,12.457000,13.535,11.616000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_10565266600396714336_kernel,1001.0,7.487999999999999,10.208,7.455,1001.0,7.487999999999999,10.208,7.455,10001.0,2.91,6.72,2.879,10001.0,2.912,6.367999999999999,2.879,10001.0,2.91,6.88,2.879,10001.0,2.5,3.232,2.463,10001.0,4.577000,5.600000,4.511000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_3129356159301994235_kernel,1001.0,5.882000000000001,8.416,5.823,1001.0,5.882000000000001,8.416,5.823,10001.0,2.704,6.367999999999999,2.656,10001.0,2.7030000000000003,5.92,2.655,10001.0,2.703,5.887,2.656,10001.0,2.703,3.552,2.656,10001.0,4.637000,5.695000,4.575000
output_gpu,reduction_post_fusion,Fused_Reshape_Sub_Exp_ReduceSum_split_4086276757985005905_kernel,1001.0,7.582999999999999,10.592,7.519,1001.0,7.582999999999999,10.592,7.519,10001.0,3.11,7.2,3.071,10001.0,3.112,6.912000000000001,3.071,10001.0,3.111,6.719,3.071,10001.0,3.116,6.656,3.071,10001.0,4.808000,5.888000,4.735000
output_gpu,reduction_post_fusion,Fused_RealDiv_ReduceSum_Mul_Add_split_12597223162398153097_kernel,1001.0,2.365,4.704,2.335,1001.0,2.365,4.704,2.335,10001.0,2.04,4.064,2.015,10001.0,2.04,4.096,2.015,10001.0,2.039,4.064,2.015,10001.0,2.039,4.096,2.015,10001.0,3.556000,4.320000,3.487000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_6265248061618449966_kernel,1001.0,59.06399999999999,72.79899999999999,58.591,1001.0,59.06399999999999,72.79899999999999,58.591,10001.0,58.791,795.057,57.983,10001.0,58.744,69.40599999999999,57.983,10001.0,58.743,70.911,57.918,10001.0,58.801,829.233,57.919,10001.0,60.792,72.287,60.192000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_1279228101663680291_kernel,1001.0,166.14,175.166,153.75799999999998,1001.0,166.14,175.166,153.75799999999998,10001.0,145.787,161.501,131.197,10001.0,61.07,74.11,57.887,10001.0,60.983,74.622,57.631,10001.0,60.938,978.319,57.599,10001.0,65.41,76.767,63.263000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13706748473241630349_kernel,1001.0,1.645,4.416,1.6,1001.0,1.645,4.416,1.6,10001.0,1.64,3.552,1.599,10001.0,1.64,3.712,1.599,10001.0,1.64,3.552,1.599,10001.0,1.64,3.52,1.599,10001.0,3.281000,3.808000,3.200000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9845961367510078313_kernel,1001.0,229.394,242.429,223.709,1001.0,229.394,242.429,223.709,10001.0,3.182,7.104,3.135,10001.0,3.1860000000000004,6.7200000000000015,3.135,10001.0,3.185,6.688,3.135,10001.0,3.195,7.072,3.136,10001.0,4.917000,6.112000,4.831000
output_gpu,reduction_post_fusion,Fused_Reshape_Reshape_Cast_ReduceSum_split_4757686232814340433_kernel,1001.0,2.387,5.28,2.336,1001.0,2.387,5.28,2.336,10001.0,1.732,4.992,1.695,10001.0,1.57,4.544,1.535,10001.0,1.57,4.544,1.535,10001.0,1.57,4.544,1.535,10001.0,3.311000,4.192000,3.232000
output_gpu,reduction_post_fusion,Fused_ReduceMean_17711345247109223154_kernel,1001.0,143.329,147.23,143.006,1001.0,143.329,147.23,143.006,10001.0,32.45,46.07899999999999,30.015,10001.0,26.211,36.511,24.799,10001.0,25.88,36.671,24.704,10001.0,25.734,33.887,24.415,10001.0,27.878,35.936,27.201000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_Log_split_13410944084942143323_kernel,1001.0,1.361,4.096,1.3430000000000002,1001.0,1.361,4.096,1.3430000000000002,10001.0,1.547,3.487,1.535,10001.0,1.568,4.256,1.535,10001.0,1.547,3.488,1.535,10001.0,1.399,1.92,1.375,10001.0,3.171000,3.745000,3.103000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_6727022908922420278_kernel,1001.0,4.933,8.16,4.895,1001.0,4.933,8.16,4.895,10001.0,14.286,17.247,13.984000000000002,10001.0,4.334,7.2,4.287,10001.0,4.336,7.2,4.287,10001.0,4.337,4.832,4.287,10001.0,5.949000,6.624000,5.887000
output_gpu,reduction_post_fusion,Fused_Cast_Cast_ReduceSum_split_10289748916283163852_kernel,1001.0,2.843,6.08,2.815,1001.0,2.843,6.08,2.815,10001.0,13.612,16.512,13.568,10001.0,4.004,6.784,3.967,10001.0,4.004,6.783,3.967,10001.0,4.004,4.384,3.968,10001.0,5.610000,6.112000,5.535000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_4739669179109504223_kernel,1001.0,5.46,8.288,5.407,1001.0,5.46,8.288,5.407,10001.0,2.29,5.343999999999999,2.24,10001.0,1.921,4.96,1.887,10001.0,1.921,4.864,1.887,10001.0,1.738,2.336,1.696,10001.0,3.651000,4.256000,3.583000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_98575232153399791_kernel,1001.0,3.455,6.4,3.423,1001.0,3.455,6.4,3.423,10001.0,2.225,5.632000000000001,2.207,10001.0,6.142,8.96,6.111000000000001,10001.0,6.142,8.96,6.111,10001.0,6.143,6.56,6.111,10001.0,7.743000,8.256000,7.711000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_16545097466616768443_kernel,1001.0,2.1630000000000003,5.343999999999999,2.143,1001.0,2.1630000000000003,5.343999999999999,2.143,10001.0,2.0,5.312,1.983,10001.0,1.3090000000000002,4.064,1.279,10001.0,1.309,4.064,1.279,10001.0,1.68,30.079,1.088,10001.0,3.053000,3.552000,3.007000
output_gpu,reduction_post_fusion,Fused_Sub_Mul_ReduceSum_split_6305333425251864818_kernel,1001.0,6.114,10.304,6.047999999999999,1001.0,6.114,10.304,6.047999999999999,10001.0,4.752,8.895999999999997,4.638999999999999,10001.0,4.758,8.64,4.638999999999999,10001.0,4.764,8.832,4.639,10001.0,4.77,6.271,4.64,10001.0,7.006000,8.032000,6.879000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_Cast_Mul_split_7086872249227058102_kernel,1001.0,109.83,113.822,109.534,1001.0,109.83,113.822,109.534,10001.0,13.417,20.35100000000001,12.544,10001.0,16.715999999999998,22.784,15.231,10001.0,16.796,22.592,16.639,10001.0,14.637,587.094,14.4,10001.0,19.187000,22.144000,17.344000
output_gpu,reduction_post_fusion,Fused_Mul_Add_ReduceMax_split_15776480582319130176_kernel,1001.0,113.057,120.894,111.966,1001.0,113.057,120.894,111.966,10001.0,103.654,121.47,100.574,10001.0,103.011,119.997,100.35,10001.0,104.34,121.182,100.35,10001.0,102.253,107.23,100.35,10001.0,110.513,116.863,108.895000
output_gpu,reduction_post_fusion,Fused_Add_Cast_ReduceMax_split_10604339311770712357_kernel,1001.0,4.619,7.584,4.575,1001.0,4.619,7.584,4.575,10001.0,2.902,6.496,2.847,10001.0,2.901,6.24,2.847,10001.0,2.901,6.24,2.847,10001.0,2.904,6.304,2.847,10001.0,4.523000,5.568000,4.447000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13494502410617732052_kernel,1001.0,6.526,9.28,6.495,1001.0,6.526,9.28,6.495,10001.0,1.861,5.216,1.823,10001.0,1.993,4.896,1.951,10001.0,1.699,4.96,1.663,10001.0,1.7,5.216,1.663,10001.0,3.468000,4.192000,3.391000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17934742783128702093_kernel,1001.0,2614.261,3013.85,2589.631,1001.0,2614.261,3013.85,2589.631,10001.0,3.871,8.064,3.807,10001.0,3.871,8.095,3.808,10001.0,3.873,8.543999999999999,3.808,10001.0,3.882,5.472,3.808,10001.0,5.509000,6.848000,5.408000
output_gpu,reduction_post_fusion,Fused_Reshape_ReduceSum_split_6969894200064021905_kernel,1001.0,66.08,71.679,64.79899999999999,1001.0,66.08,71.679,64.79899999999999,10001.0,16.583000000000002,24.8,16.095,10001.0,17.070999999999998,26.431,16.447,10001.0,16.516,25.823,16.031,10001.0,16.294,22.528,15.84,10001.0,18.528000,24.384,18.015000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_4564102481086974292_kernel,1001.0,3.08,6.207999999999999,3.039,1001.0,3.08,6.207999999999999,3.039,10001.0,2.375,6.08,2.367,10001.0,1.415,4.288,1.376,10001.0,1.575,4.352,1.535,10001.0,1.356,9.44,1.311,10001.0,3.296000,3.840000,3.232000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_12816099502507023364_kernel,1001.0,2.766,5.824,2.72,1001.0,2.766,5.824,2.72,10001.0,2.459,6.624,2.431,10001.0,2.458,6.176,2.431,10001.0,2.459,6.336,2.431,10001.0,2.461,6.24,2.431,10001.0,4.515000,5.632000,4.447000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_17146440480583811922_kernel,1001.0,7.67,10.432,7.551,1001.0,7.67,10.432,7.551,10001.0,3.1260000000000003,7.04,3.071,10001.0,3.125,6.72,3.071,10001.0,3.13,6.72,3.072,10001.0,2.682,3.712,2.655,10001.0,4.809000,5.952000,4.735000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15255374000456318533_kernel,1001.0,2.77,5.664,2.7510000000000003,1001.0,2.77,5.664,2.7510000000000003,10001.0,1.806,4.832,1.791,10001.0,1.528,4.48,1.503,10001.0,1.528,4.288,1.503,10001.0,1.529,1.888,1.503,10001.0,3.265000,3.936000,3.199000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_13063284910209712600_kernel,1001.0,73.719,85.34299999999999,72.447,1001.0,73.719,85.34299999999999,72.447,10001.0,70.05199999999999,82.11,69.567,10001.0,70.16699999999999,82.46199999999997,69.567,10001.0,70.123,81.31099999999998,69.599,10001.0,70.13,82.33500000000001,69.566,10001.0,72.434,83.135,71.935000
output_gpu,reduction_post_fusion,Fused_ReduceMean_728894389712972095_kernel,1001.0,41.789,52.736,41.279,1001.0,41.789,52.736,41.279,10001.0,42.228,54.399,40.928,10001.0,42.146,55.039,40.959,10001.0,42.129,52.927,40.959,10001.0,41.293000000000006,49.855,40.895,10001.0,44.267,53.983999999999995,43.712000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_652067717987047060_kernel,1001.0,3.88,7.167999999999999,3.839,1001.0,3.88,7.167999999999999,3.839,10001.0,2.572,5.632000000000001,2.559,10001.0,1.987,4.863,1.951,10001.0,1.986,4.768,1.951,10001.0,1.714,2.08,1.695,10001.0,3.723000,4.256000,3.648000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_9244583308022586472_kernel,1001.0,13.989,15.936,13.887,1001.0,13.989,15.936,13.887,10001.0,2.443,6.464,2.399,10001.0,2.442,6.047000000000001,2.399,10001.0,2.442,6.048,2.399,10001.0,2.089,2.88,2.047,10001.0,4.156000,4.992000,4.064000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_14289495857752519911_kernel,1001.0,51.048,53.951,51.007,1001.0,51.048,53.951,51.007,10001.0,5.674,9.216,5.119,10001.0,13.726,17.535999999999998,13.472,10001.0,4.848,9.375,4.736,10001.0,4.85,9.503,4.736,10001.0,6.793000,9.760000,6.432000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Cast_Mul_ReduceSum_split_12144552849236552072_kernel,1001.0,134.72,143.998,133.054,1001.0,134.72,143.998,133.054,10001.0,132.892,144.061,130.461,10001.0,132.86,141.149,130.654,10001.0,132.779,144.317,130.398,10001.0,132.695,1029.39,130.622,10001.0,135.731,142.431,133.695000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_7498848608164351722_kernel,1001.0,237.195,248.349,235.069,1001.0,237.195,248.349,235.069,10001.0,232.437,244.828,231.835,10001.0,232.439,244.379,231.771,10001.0,232.436,244.827,231.835,10001.0,232.544,981.295,231.835,10001.0,234.693,244.862,233.694000
output_gpu,reduction_post_fusion,Fused_ReduceMean_7448262306286874009_kernel,1001.0,163.526,170.84599999999998,152.35,1001.0,163.526,170.84599999999998,152.35,10001.0,35.74299999999999,47.359,32.896,10001.0,29.871,40.351,28.543000000000003,10001.0,29.706,40.704,28.512,10001.0,28.973,36.703,28.383,10001.0,31.951,40.192,31.360000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_16448887507098774487_kernel,1001.0,65.95100000000001,71.391,64.79899999999999,1001.0,65.95100000000001,71.391,64.79899999999999,10001.0,16.586,24.703000000000003,16.159000000000002,10001.0,17.102,26.752,16.8,10001.0,16.518,24.927,15.999,10001.0,16.339,22.271,15.839,10001.0,18.527000,24.319,17.984000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_588956880411653459_kernel,1001.0,3.749,5.92,3.68,1001.0,3.749,5.92,3.68,10001.0,2.156,5.5360000000000005,2.112,10001.0,1.846,2.464,1.823,10001.0,2.155,5.28,2.112,10001.0,1.846,2.464,1.823,10001.0,3.847000,5.440000,3.775000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_split_15937433446338712464_kernel,1001.0,7.572,10.4,7.487999999999999,1001.0,7.572,10.4,7.487999999999999,10001.0,3.088,7.04,3.039,10001.0,3.09,6.848,3.039,10001.0,3.091,6.783,3.039,10001.0,2.647,3.552,2.591,10001.0,4.741000,5.792000,4.703000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_ReduceSum_Log_split_11941850547297653662_kernel,1001.0,351.311,385.724,329.88300000000004,1001.0,351.311,385.724,329.88300000000004,10001.0,5.77,9.312,5.472,10001.0,43.749,52.447,42.239,10001.0,5.695,8.928,5.375,10001.0,5.163,5.759,4.927,10001.0,7.209000,8.063000,6.944000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_16423130798082889369_kernel,1001.0,2.521,4.896,2.495,1001.0,2.521,4.896,2.495,10001.0,2.155,5.728,2.143,10001.0,2.155,5.343999999999999,2.143,10001.0,2.155,5.344,2.143,10001.0,1.851,9.376,1.823,10001.0,3.836000,4.768000,3.775000
output_gpu,reduction_post_fusion,Fused_Reshape_ReduceSum_split_601052953231422684_kernel,1001.0,65.895,71.391,64.831,1001.0,65.895,71.391,64.831,10001.0,16.549999999999994,24.543000000000003,16.031,10001.0,17.033,26.559,16.416,10001.0,16.492,24.864,15.872,10001.0,16.363,22.336,15.903,10001.0,18.493000,24.192,18.016000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_15940470878870481376_kernel,1001.0,4.569,7.648,4.48,1001.0,4.569,7.648,4.48,10001.0,13.864,17.727,12.672,10001.0,4.317,7.2,4.287,10001.0,4.317,7.232,4.287,10001.0,4.318,8.576,4.287,10001.0,5.849000,6.400000,5.791000
output_gpu,reduction_post_fusion,Fused_ReduceMean_10360620627367660437_kernel,1001.0,233.898,250.045,221.981,1001.0,233.898,250.045,221.981,10001.0,61.104,90.558,53.663,10001.0,42.065,53.087,40.287,10001.0,41.696000000000005,52.959,40.159,10001.0,41.375,767.955,40.192,10001.0,44.153,51.903999999999996,43.552000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_8895374587018981820_kernel,1001.0,233.604,252.509,223.261,1001.0,233.604,252.509,223.261,10001.0,304.106,351.35400000000004,218.492,10001.0,84.803,102.206,81.918,10001.0,86.22399999999998,102.943,81.918,10001.0,86.615,103.102,82.43,10001.0,90.71300000000001,98.08,89.023000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15820032083568895392_kernel,1001.0,1.224,3.904,1.1840000000000002,1001.0,1.224,3.904,1.1840000000000002,10001.0,1.396,3.295,1.375,10001.0,1.43,4.032,1.4069999999999998,10001.0,1.395,3.328,1.375,10001.0,1.263,1.632,1.247,10001.0,3.059000,3.968000,3.007000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_11140957748696835535_kernel,1001.0,8.232000000000001,11.071,8.191,1001.0,8.232000000000001,11.071,8.191,10001.0,3.197,6.271,3.167,10001.0,2.519,5.76,2.495,10001.0,2.519,5.695,2.495,10001.0,2.524,5.76,2.495,10001.0,4.646000,5.280000,4.576000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_1218863806052220879_kernel,1001.0,31.72,41.312,30.591,1001.0,31.72,41.312,30.591,10001.0,25.385,37.087,24.863000000000003,10001.0,25.334,37.279,24.863000000000003,10001.0,25.335,37.952,24.831,10001.0,25.407,37.952,24.896,10001.0,27.472,37.664,27.007000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_8843945041846172220_kernel,1001.0,22.209,33.248000000000005,21.695,1001.0,22.209,33.248000000000005,21.695,10001.0,21.783,33.279999999999994,21.216,10001.0,21.791,33.664,21.375,10001.0,21.688,33.759,21.215,10001.0,21.687,30.495,21.215,10001.0,23.730000,33.183,23.295000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_11470169223351352124_kernel,1001.0,30.285,41.184,29.6,1001.0,30.285,41.184,29.6,10001.0,29.339,40.415,28.863000000000003,10001.0,29.336,40.48,28.864,10001.0,29.315,40.159,28.768,10001.0,29.284,37.951,28.735,10001.0,31.523000000000003,40.799,30.880000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_11793891151941006127_kernel,1001.0,1.408,3.552,1.375,1001.0,1.408,3.552,1.375,10001.0,1.406,3.488,1.375,10001.0,1.406,3.488,1.375,10001.0,1.406,3.488,1.375,10001.0,1.407,7.936,1.375,10001.0,3.272000,4.000000,3.199000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_6394141165707482428_kernel,1001.0,18.059,29.887,17.727,1001.0,18.059,29.887,17.727,10001.0,17.28,28.16,16.703,10001.0,17.194000000000006,27.872,16.639,10001.0,17.291,27.52,16.927,10001.0,17.177999999999997,26.112,16.608,10001.0,19.723000000000003,29.055999999999997,18.368000
output_gpu,reduction_post_fusion,Fused_Sub_Exp_Mul_Sub_ReduceSum_Mul_split_13869946147053202321_kernel,1001.0,8.061,10.72,8.031,1001.0,8.061,10.72,8.031,10001.0,8.356,10.24,8.318999999999999,10001.0,8.351,10.24,8.318999999999999,10001.0,8.354,10.208,8.319,10001.0,8.351,8.704,8.288,10001.0,9.691000,10.271000,9.631000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_111860644479318828_kernel,1001.0,8.056000000000001,11.168,7.999,1001.0,8.056000000000001,11.168,7.999,10001.0,2.655,6.08,2.591,10001.0,2.656,5.728,2.591,10001.0,2.658,5.792,2.591,10001.0,2.659,3.296,2.591,10001.0,4.326000,4.992000,4.255000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15671849393298523079_kernel,1001.0,3.326,6.367999999999999,3.295,1001.0,3.326,6.367999999999999,3.295,10001.0,1.791,9.024,1.759,10001.0,1.5219999999999998,4.288,1.503,10001.0,1.522,4.416,1.503,10001.0,1.377,1.696,1.343,10001.0,3.282000,3.840000,3.231000
output_gpu,reduction_post_fusion,Fused_Reshape_ReduceSum_split_6943325950722558809_kernel,1001.0,89.14,94.111,83.615,1001.0,89.14,94.111,83.615,10001.0,21.146,28.512,20.575,10001.0,21.604000000000006,32.254999999999995,21.087,10001.0,21.349,31.647,20.928,10001.0,21.096,28.703,20.735,10001.0,23.092000,31.615,22.528000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_10184234684540523326_kernel,1001.0,6.609,9.055,6.591,1001.0,6.609,9.055,6.591,10001.0,2.4930000000000003,4.831,2.463,10001.0,2.492,4.864,2.463,10001.0,2.492,4.864,2.463,10001.0,2.142,9.343,2.111,10001.0,4.127000,5.152000,4.063000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_12771490750012190459_kernel,1001.0,8.693999999999999,11.008,8.607999999999999,1001.0,8.693999999999999,11.008,8.607999999999999,10001.0,2.722,6.144,2.687,10001.0,2.721,5.92,2.687,10001.0,2.721,5.92,2.687,10001.0,2.333,3.072,2.303,10001.0,4.756000,5.760000,4.671000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_8859691318446054333_kernel,1001.0,2.287,5.471,2.271,1001.0,2.287,5.471,2.271,10001.0,2.075,5.184,2.047,10001.0,1.3630000000000002,4.16,1.3430000000000002,10001.0,1.362,4.128,1.343,10001.0,1.361,1.728,1.343,10001.0,3.092000,3.551000,3.039000
output_gpu,reduction_post_fusion,Fused_Mul_Sub_Mul_ReduceSum_Mul_Mul_Mul_split_5043065819975456671_kernel,1001.0,28.018,30.848000000000003,25.696,1001.0,28.018,30.848000000000003,25.696,10001.0,23.561,28.832,22.528,10001.0,23.383000000000003,29.216,22.4,10001.0,23.385,28.543000000000003,22.527,10001.0,23.259,26.783,22.367,10001.0,25.755000,29.535000,24.832000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_17670503119928074267_kernel,1001.0,366.086,385.307,348.219,1001.0,366.086,385.307,348.219,10001.0,4.312,7.52,4.287,10001.0,45.06100000000001,51.711000000000006,42.111,10001.0,2.927,5.984,2.879,10001.0,2.519,3.04,2.495,10001.0,4.657000,5.280000,4.576000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_4345341268849637758_kernel,1001.0,100.129,102.814,99.902,1001.0,100.129,102.814,99.902,10001.0,13.678,19.808,12.671,10001.0,25.937,32.70399999999999,24.127,10001.0,12.319,20.224,11.584,10001.0,11.772,16.799000000000003,11.008,10001.0,14.965000,19.456,14.208000
output_gpu,reduction_post_fusion,Fused_ReduceMean_6603234438879493274_kernel,1001.0,144.005,152.799,143.326,1001.0,144.005,152.799,143.326,10001.0,127.408,154.045,123.645,10001.0,126.669,954.829,123.677,10001.0,127.522,152.66899999999998,123.677,10001.0,124.363,130.846,123.614,10001.0,139.5,146.751,138.943000
output_gpu,reduction_post_fusion,Fused_ReduceMean_10278137507852802939_kernel,1001.0,53.5,61.855,52.895,1001.0,53.5,61.855,52.895,10001.0,49.47,61.855,45.631,10001.0,48.633,62.143,45.631,10001.0,48.885,61.823,45.631,10001.0,46.303,52.927,45.631,10001.0,53.263,58.687000000000005,52.703000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_14718931859860011190_kernel,1001.0,70.803,82.815,68.415,1001.0,70.803,82.815,68.415,10001.0,66.221,77.34299999999998,65.79,10001.0,66.277,76.99000000000002,65.79,10001.0,66.26400000000001,77.694,65.791,10001.0,66.208,793.65,65.79,10001.0,68.363,77.215,67.935000
output_gpu,reduction_post_fusion,Fused_Reshape_Reshape_Cast_ReduceSum_split_10356419823469240956_kernel,1001.0,1.639,4.448,1.599,1001.0,1.639,4.448,1.599,10001.0,1.692,3.616,1.663,10001.0,1.692,3.551,1.663,10001.0,1.691,3.583,1.663,10001.0,1.691,3.584,1.663,10001.0,3.294000,3.840000,3.231000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_6680071890310244923_kernel,1001.0,231.191,247.293,218.109,1001.0,231.191,247.293,218.109,10001.0,322.117,355.802,237.435,10001.0,84.15299999999999,100.126,80.639,10001.0,83.87100000000001,99.487,80.35,10001.0,81.655,730.931,80.286,10001.0,89.347,97.18400000000001,87.487000
output_gpu,reduction_post_fusion,Fused_Sub_Mul_ReduceSum_Exp_split_4971327750864844560_kernel,1001.0,350.272,386.299,330.492,1001.0,350.272,386.299,330.492,10001.0,9.876,15.2,9.279,10001.0,45.873000000000005,52.767,42.815,10001.0,10.096,14.751,9.535,10001.0,9.799,12.032,9.184,10001.0,11.959000,14.496000,11.039000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_5124766514423059357_kernel,1001.0,50.011,60.063,48.831,1001.0,50.011,60.063,48.831,10001.0,47.63,60.351000000000006,47.072,10001.0,47.599,60.415000000000006,47.103,10001.0,47.614,59.743,47.071,10001.0,47.48,57.823,47.071,10001.0,49.86,60.863,49.439000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_6109414240503509605_kernel,1001.0,3.865,5.984,3.839,1001.0,3.865,5.984,3.839,10001.0,3.863,6.047999999999999,3.839,10001.0,3.862,6.047999999999999,3.839,10001.0,3.862,6.047,3.839,10001.0,3.863,6.08,3.839,10001.0,5.720000,6.784000,5.663000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_9498261738966874619_kernel,1001.0,79.738,90.719,79.10300000000001,1001.0,79.738,90.719,79.10300000000001,10001.0,79.381,91.39,78.718,10001.0,79.38499999999998,91.774,78.718,10001.0,79.37700000000002,91.455,78.686,10001.0,79.519,1059.245,78.654,10001.0,81.35100000000001,92.44800000000001,80.191000
output_gpu,reduction_post_fusion,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_7566706435819628490_kernel,1001.0,18.55,27.935,17.984,1001.0,18.55,27.935,17.984,10001.0,24.155,32.543,21.888,10001.0,22.921,32.223,21.151,10001.0,23.678,32.832,21.792,10001.0,22.01,29.472,20.895,10001.0,25.245000,32.510999999999996,23.872000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_16948702104742076258_kernel,1001.0,5.426,8.96,5.312,1001.0,5.426,8.96,5.312,10001.0,6.481,12.063,6.335,10001.0,6.486000000000001,11.871,6.336,10001.0,6.487,11.071,6.335,10001.0,5.566,8.319999999999999,5.439,10001.0,8.174000,10.656000,7.840000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_Mul_ReduceSum_split_592974943113998615_kernel,1001.0,126.856,132.511,125.535,1001.0,126.856,132.511,125.535,10001.0,127.394,133.438,125.821,10001.0,127.312,135.134,125.309,10001.0,127.34300000000002,132.92600000000002,125.469,10001.0,127.585,860.0799999999999,125.501,10001.0,128.932,136.926,127.423000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_8179379257928922146_kernel,1001.0,7.85,9.792,7.776,1001.0,7.85,9.792,7.776,10001.0,2.267,6.112,2.239,10001.0,2.266,5.791,2.239,10001.0,2.266,5.856,2.239,10001.0,2.267,6.24,2.239,10001.0,3.957000,4.896000,3.872000
output_gpu,reduction_post_fusion,Fused_Mul_ReduceSum_split_10063155855610842188_kernel,1001.0,20.172,22.239,19.903,1001.0,20.172,22.239,19.903,10001.0,2.622,6.622999999999999,2.559,10001.0,2.624,6.176,2.559,10001.0,2.624,6.144,2.559,10001.0,2.624,3.424,2.56,10001.0,4.678000,5.632000,4.607000
output_gpu,reduction_post_fusion,Fused_Cast_ReduceSum_split_11134459247345513023_kernel,1001.0,3.093,4.96,3.071,1001.0,3.093,4.96,3.071,10001.0,3.092,5.152,3.071,10001.0,3.091,5.024,3.071,10001.0,3.091,5.024,3.071,10001.0,3.091,5.024,3.071,10001.0,4.652000,5.312000,4.607000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_1722693735800738113_kernel,1001.0,3.452,6.624,3.392,1001.0,3.452,6.624,3.392,10001.0,13.815,16.864,12.671,10001.0,4.243,7.136,4.223,10001.0,4.243,6.975,4.223,10001.0,4.242,4.608,4.223,10001.0,5.861000,6.368000,5.823000
output_gpu,reduction_post_fusion,Fused_ReduceMax_split_6945804505483871554_kernel,1001.0,2.562,5.856,2.527,1001.0,2.562,5.856,2.527,10001.0,2.494,6.4,2.463,10001.0,2.494,6.144,2.463,10001.0,2.494,6.271,2.463,10001.0,2.145,5.568,2.111,10001.0,4.215000,5.280000,4.159000
output_gpu,reduction_post_fusion,Fused_ReduceSum_Mul_Mul_Mul_Mul_RealDiv_Sub_Mul_Mul_Mul_RealDiv_Sub_Mul_split_18399083569230280291_kernel,1001.0,7.028,9.984,7.007000000000001,1001.0,7.028,9.984,7.007000000000001,10001.0,2.403,6.207999999999999,2.367,10001.0,2.516,5.888,2.495,10001.0,2.228,5.952,2.207,10001.0,1.912,3.296,1.887,10001.0,3.875000,4.864000,3.807000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_2429800524451781048_kernel,1001.0,2276.425,2609.823,2243.7470000000003,1001.0,2276.425,2609.823,2243.7470000000003,10001.0,2.6830000000000003,6.336,2.655,10001.0,2.685,6.08,2.655,10001.0,2.685,6.016,2.655,10001.0,2.687,6.368,2.655,10001.0,4.744000,5.823000,4.671000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_11188387990252538041_kernel,1001.0,8.107000000000001,11.072,8.062999999999999,1001.0,8.107000000000001,11.072,8.062999999999999,10001.0,3.156,6.656000000000001,3.103,10001.0,3.159,6.528,3.103,10001.0,3.159,6.4,3.103,10001.0,3.161,3.968,3.103,10001.0,5.218000,6.305000,5.120000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_18046230714060098124_kernel,1001.0,2.149,4.384,2.111,1001.0,2.149,4.384,2.111,10001.0,2.147,4.416,2.111,10001.0,2.146,4.448,2.111,10001.0,2.146,4.48,2.111,10001.0,2.146,2.912,2.111,10001.0,3.966000,5.216000,3.903000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_15100445699409848623_kernel,1001.0,247.492,257.181,231.069,1001.0,247.492,257.181,231.069,10001.0,2.698,6.144,2.655,10001.0,2.697,5.888,2.655,10001.0,2.699,5.888,2.655,10001.0,2.698,3.455,2.655,10001.0,4.382000,5.248000,4.319000
output_gpu,reduction_post_fusion,Fused_Mul_Add_ReduceMax_split_14375517492105268498_kernel,1001.0,86.236,94.558,85.119,1001.0,86.236,94.558,85.119,10001.0,79.258,94.654,76.575,10001.0,80.35000000000002,94.783,77.02199999999999,10001.0,80.863,93.438,76.67,10001.0,78.498,84.67,76.735,10001.0,84.827,91.199,83.167000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_16845748357062715286_kernel,1001.0,16.054000000000002,17.92,15.967,1001.0,16.054000000000002,17.92,15.967,10001.0,2.446,5.888,2.4,10001.0,2.447,5.632000000000001,2.4,10001.0,2.447,5.632,2.4,10001.0,2.447,3.232,2.4,10001.0,4.158000,5.120000,4.095000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_2681961934642545650_kernel,1001.0,10.945,13.664,10.816,1001.0,10.945,13.664,10.816,10001.0,14.787,18.079,14.272,10001.0,4.482,7.519,4.415,10001.0,4.482,7.552,4.415,10001.0,4.485,7.52,4.415,10001.0,6.109000,7.104000,5.983000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_Mul_split_9989202327243490401_kernel,1001.0,1.439,3.4560000000000004,1.4069999999999998,1001.0,1.439,3.4560000000000004,1.4069999999999998,10001.0,1.4369999999999998,3.488,1.4069999999999998,10001.0,1.4369999999999998,3.52,1.4069999999999998,10001.0,1.437,3.52,1.407,10001.0,1.437,8.512,1.407,10001.0,3.161000,3.872000,3.103000
output_gpu,reduction_post_fusion,Fused_Mul_Mul_ReduceSum_split_12912535945341581353_kernel,1001.0,8.062999999999999,10.816,7.936,1001.0,8.062999999999999,10.816,7.936,10001.0,13.921,17.951,12.48,10001.0,4.331,7.327999999999999,4.255,10001.0,4.331,7.584,4.255,10001.0,4.333,7.903,4.255,10001.0,6.476000,7.136000,6.367000
output_gpu,reduction_post_fusion,Fused_ReduceSum_split_2608457562694044771_kernel,1001.0,4.82,6.816,4.736000000000001,1001.0,4.82,6.816,4.736000000000001,10001.0,2.172,5.568,2.143,10001.0,2.171,5.312,2.143,10001.0,2.171,5.312,2.143,10001.0,2.171,2.848,2.143,10001.0,4.164000,5.120000,4.095000
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_17097736276332567784_kernel,1001.0,241.82,261.661,233.117,1001.0,248.478,258.781,233.789,10001.0,225.283,260.795,220.251,10001.0,225.283,260.795,220.251,10001.0,224.51,260.85900000000004,220.156,10001.0,152.165,152.92600000000002,151.93400000000003,10001.0,172.275,188.12599999999998,169.69500000000002
output_gpu,yolov3_darknet,Fused_AddN_Cast_Transpose_fusion_2346913762335189478_kernel,1001.0,14182.317,14399.976,13971.758,1001.0,9427.461,10403.002,8430.835,10001.0,9446.752,10471.641,8548.286,10001.0,9446.752,10471.641,8548.286,10001.0,9413.304,10268.329,8570.629,10001.0,685.415,1418.953,675.3169999999999,10001.0,679.061,688.634,672.377
output_gpu,yolov3_darknet,Fused_Add_split_5261878416554002092_kernel,1001.0,90.727,96.606,89.95100000000001,1001.0,88.37299999999999,91.455,87.679,10001.0,89.098,96.51,87.48599999999998,10001.0,89.098,96.51,87.48599999999998,10001.0,88.87,96.159,87.48700000000001,10001.0,95.063,801.716,92.415,10001.0,93.57000000000001,97.151,92.384
output_gpu,yolov3_darknet,Fused_Cast_fusion_3210942441325238138_kernel,1001.0,1.422,4.512,1.4069999999999998,1001.0,1.42,1.7919999999999998,1.4069999999999998,10001.0,1.419,4.192,1.376,10001.0,1.419,4.192,1.376,10001.0,1.423,9.44,1.376,10001.0,1.422,1.92,1.376,10001.0,3.445000,4.033000,3.360000
output_gpu,yolov3_darknet,Fused_Transpose_split_2070372294367398278_kernel,1001.0,87.56200000000001,92.127,86.559,1001.0,89.316,93.727,88.479,10001.0,82.234,93.022,77.982,10001.0,82.234,93.022,77.982,10001.0,81.708,92.574,77.95,10001.0,48.149,51.776,46.335,10001.0,52.777,56.928,50.975
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_12281391964795282874_kernel,1001.0,616.161,626.776,613.048,1001.0,185.857,197.854,177.054,10001.0,160.96099999999996,181.405,156.477,10001.0,160.96099999999996,181.405,156.477,10001.0,159.86800000000005,181.02,156.253,10001.0,157.86,875.987,154.909,10001.0,169.076,172.83100000000002,166.782
output_gpu,yolov3_darknet,Fused_Transpose_fusion_4135085735100155165_kernel,1001.0,302.562,310.108,299.132,1001.0,96.875,739.543,92.575,10001.0,90.437,103.006,86.846,10001.0,90.437,103.006,86.846,10001.0,90.882,101.822,87.071,10001.0,86.05799999999999,99.231,83.327,10001.0,92.737,96.447,91.10300000000001
output_gpu,yolov3_darknet,Fused_Cast_Transpose_split_1668508625948198013_kernel,1001.0,1.431,4.16,1.4069999999999998,1001.0,1.436,1.7919999999999998,1.4069999999999998,10001.0,1.436,4.384,1.4069999999999998,10001.0,1.436,4.384,1.4069999999999998,10001.0,1.436,4.448,1.407,10001.0,1.304,1.632,1.279,10001.0,3.446000,3.968000,3.360000
output_gpu,yolov3_darknet,Fused_Cast_Transpose_split_10434588479252844865_kernel,1001.0,3.099,6.144,2.975,1001.0,3.1210000000000004,3.52,3.072,10001.0,3.11,6.143,3.071,10001.0,3.11,6.143,3.071,10001.0,3.109,6.144,3.071,10001.0,2.314,2.816,2.271,10001.0,4.621000,5.280000,4.543000
output_gpu,yolov3_darknet,Fused_Cast_Transpose_split_3528709108505838205_kernel,1001.0,2.039,5.056,1.984,1001.0,2.101,2.719,2.079,10001.0,2.097,5.088,2.047,10001.0,2.097,5.088,2.047,10001.0,2.101,9.407,2.047,10001.0,1.685,2.016,1.632,10001.0,3.704000,4.256000,3.647000
output_gpu,yolov3_darknet,Fused_Cast_fusion_2438007131923709233_kernel,1001.0,1.258,4.0,1.247,1001.0,1.255,1.632,1.247,10001.0,1.258,9.088,1.247,10001.0,1.258,9.088,1.247,10001.0,1.256,4.064,1.216,10001.0,1.256,1.664,1.247,10001.0,3.237000,3.808000,3.167000
output_gpu,yolov3_darknet,Fused_Cast_fusion_3135455775131459617_kernel,1001.0,2.1180000000000003,5.056,2.079,1001.0,2.117,2.72,2.079,10001.0,2.115,5.024,2.079,10001.0,2.115,5.024,2.079,10001.0,2.116,5.056,2.079,10001.0,2.12,2.592,2.079,10001.0,4.187000,4.897000,4.127000
output_gpu,yolov3_darknet,Fused_Transpose_split_18041598062773898709_kernel,1001.0,22.253,25.375,21.824,1001.0,20.19,21.087,19.903,10001.0,21.146,25.471,19.775,10001.0,21.146,25.471,19.775,10001.0,21.801,25.311,21.152,10001.0,10.419,11.072,10.176,10001.0,13.618000,14.560000,13.376000
output_gpu,yolov3_darknet,Fused_Add_fusion_6198467796363076803_kernel,1001.0,45.455,50.911,44.927,1001.0,44.667,48.415,44.191,10001.0,44.777,51.327,43.711000000000006,10001.0,44.777,51.327,43.711000000000006,10001.0,44.739,50.847,43.68,10001.0,48.182,51.648,45.983,10001.0,47.253,51.103,46.431
output_gpu,yolov3_darknet,Fused_Transpose_fusion_11894930320330768025_kernel,1001.0,125.817,134.238,123.294,1001.0,103.579,767.607,102.303,10001.0,107.353,123.838,102.302,10001.0,107.353,123.838,102.302,10001.0,105.441,123.454,102.238,10001.0,55.279,57.087,54.271,10001.0,62.165,64.0,61.312
output_gpu,yolov3_darknet,Fused_Maximum_Mul_Sub_Abs_Neg_Exp_Add_Log_Add_Mul_Sub_Mul_Less_Cast_Reshape_Mul__more_split_13877461448529976323_kernel,1001.0,1.866,5.024,1.824,1001.0,1.611,2.4,1.568,10001.0,1.875,5.28,1.824,10001.0,1.875,5.28,1.824,10001.0,1.875,5.184,1.824,10001.0,1.608,2.496,1.567,10001.0,3.626000,4.352000,3.552000
output_gpu,yolov3_darknet,Fused_Mul_Add_Mul_Add_Minimum_Sub_Sub_Maximum_Sub_Maximum_split_11405037783730064407_kernel,1001.0,285.572,296.252,272.028,1001.0,219.421,228.158,212.477,10001.0,209.981,229.531,206.46,10001.0,209.981,229.531,206.46,10001.0,209.607,229.372,206.588,10001.0,210.057,230.3,206.365,10001.0,223.99099999999999,225.662,222.42999999999998
output_gpu,yolov3_darknet,Fused_Transpose_split_5914675560236879135_kernel,1001.0,211.324,220.829,209.917,1001.0,61.779,63.552,60.639,10001.0,63.785,71.48599999999998,60.255,10001.0,63.785,71.48599999999998,60.255,10001.0,63.15,72.063,60.127,10001.0,50.63,747.764,48.383,10001.0,55.378,58.048,53.407999999999994
output_gpu,yolov3_darknet,Fused_Add_split_210485882159418272_kernel,1001.0,693.0110000000001,711.063,684.888,1001.0,688.909,1592.172,685.368,10001.0,688.0659999999998,711.475,685.4590000000002,10001.0,688.0659999999998,711.475,685.4590000000002,10001.0,687.9910000000001,712.179,677.268,10001.0,751.0949999999999,1478.313,744.852,10001.0,710.057,732.7620000000001,700.314
output_gpu,yolov3_darknet,Fused_Mul_Maximum_split_9996968564126295888_kernel,1001.0,12.477,17.824,12.224,1001.0,12.545,18.912,12.192,10001.0,12.43,18.176,11.871,10001.0,12.43,18.176,11.871,10001.0,12.487,17.984,12.192,10001.0,19.711,21.568,19.039,10001.0,14.465000,17.248000,14.112000
output_gpu,yolov3_darknet,Fused_Transpose_split_9031373974262086375_kernel,1001.0,677.9110000000001,684.9830000000001,671.768,1001.0,264.548,274.14,252.284,10001.0,247.664,277.402,243.195,10001.0,247.664,277.402,243.195,10001.0,248.381,278.331,243.515,10001.0,177.122,891.891,171.869,10001.0,188.831,193.822,185.79000000000002
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_12375248346866210197_kernel,1001.0,549.741,594.201,525.561,1001.0,460.299,511.002,435.195,10001.0,438.09,512.854,435.992,10001.0,438.09,512.854,435.992,10001.0,440.337,512.3430000000001,436.056,10001.0,605.831,699.061,602.5500000000001,10001.0,668.472,669.0500000000001,668.122
output_gpu,yolov3_darknet,Fused_Neg_Exp_Add_RealDiv_Sub_BroadcastTo_Mul_Mul_split_11214060943599653272_kernel,1001.0,24.014,29.247,23.424,1001.0,24.383000000000003,29.664,23.648000000000003,10001.0,24.284,31.775,23.008000000000003,10001.0,24.284,31.775,23.008000000000003,10001.0,24.078000000000003,31.52,23.072,10001.0,24.047,31.84,22.975,10001.0,26.243,31.551000000000002,25.408
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_3081019101040639257_kernel,1001.0,269.66700000000003,282.62,255.644,1001.0,224.22,225.086,223.549,10001.0,229.037,264.827,223.996,10001.0,229.037,264.827,223.996,10001.0,228.98,265.019,223.996,10001.0,302.302,302.651,302.075,10001.0,336.041,336.60499999999996,335.836
output_gpu,yolov3_darknet,Fused_Mul_Add_Mul_Add_Minimum_Sub_Sub_Maximum_Sub_Maximum_split_3215116932040408984_kernel,1001.0,66.792,70.911,65.824,1001.0,48.543,51.072,47.2,10001.0,50.569,57.407,47.263000000000005,10001.0,50.569,57.407,47.263000000000005,10001.0,49.869,57.695,47.199,10001.0,50.315,54.591,47.167,10001.0,56.613,61.216,54.111
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_11350827629827020249_kernel,1001.0,1057.284,1163.282,1024.659,1001.0,918.269,1568.781,887.189,10001.0,893.24,1043.02,889.199,10001.0,893.24,1043.02,889.199,10001.0,891.347,1042.285,887.0239999999999,10001.0,903.213,1534.953,902.034,10001.0,1003.029,1004.8539999999999,1001.7180000000001
output_gpu,yolov3_darknet,Fused_Add_fusion_210485882159418272_kernel,1001.0,692.19,711.063,685.783,1001.0,692.0219999999999,1372.622,685.943,10001.0,687.8710000000001,710.3549999999998,678.995,10001.0,687.8710000000001,710.3549999999998,678.995,10001.0,687.674,710.0039999999999,685.4590000000002,10001.0,751.032,1455.881,743.348,10001.0,709.5350000000001,715.3850000000001,706.905
output_gpu,yolov3_darknet,Fused_Transpose_split_8661562565459097272_kernel,1001.0,64.46300000000001,70.751,63.68,1001.0,52.521,55.039,52.159,10001.0,53.755,64.76700000000001,51.967,10001.0,53.755,64.76700000000001,51.967,10001.0,56.44600000000001,64.542,51.999,10001.0,31.509000000000004,725.173,29.44,10001.0,33.31,34.88,32.639
output_gpu,yolov3_darknet,Fused_Cast_fusion_4461640665720830342_kernel,1001.0,2.146,5.024,2.08,1001.0,2.145,2.72,2.08,10001.0,2.142,5.152,2.048,10001.0,2.142,5.152,2.048,10001.0,2.142,5.184,2.048,10001.0,2.869,3.392,2.688,10001.0,4.219000,4.864000,4.096000
output_gpu,yolov3_darknet,Fused_Add_fusion_17860235963285917190_kernel,1001.0,350.475,361.787,345.691,1001.0,347.27,352.956,345.276,10001.0,346.954,361.081,345.2100000000001,10001.0,346.954,361.081,345.2100000000001,10001.0,347.17800000000005,361.785,345.242,10001.0,375.8250000000001,380.538,358.907,10001.0,352.202,364.285,350.493
output_gpu,yolov3_darknet,Fused_Neg_Exp_Add_RealDiv_Sub_BroadcastTo_Mul_Mul_split_14339112780438751518_kernel,1001.0,352.39300000000003,359.259,346.94,1001.0,314.508,319.676,312.476,10001.0,313.9600000000001,320.0259999999999,311.898,10001.0,313.9600000000001,320.0259999999999,311.898,10001.0,313.947,320.922,311.67400000000004,10001.0,314.162,1034.065,311.867,10001.0,317.888,322.397,315.581
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_2731157963391380793_kernel,1001.0,236.503,248.573,231.901,1001.0,91.374,94.591,90.687,10001.0,95.3,110.238,90.462,10001.0,95.3,110.238,90.462,10001.0,92.718,110.206,90.75,10001.0,38.483,480.249,37.471,10001.0,43.250000,48.159000,42.240000
output_gpu,yolov3_darknet,Fused_Transpose_split_11952510180089398822_kernel,1001.0,253.224,269.532,232.285,1001.0,231.941,243.805,215.677,10001.0,206.679,240.955,203.1,10001.0,206.679,240.955,203.1,10001.0,207.338,241.436,203.132,10001.0,111.754,873.139,108.062,10001.0,121.294,123.903,119.839
output_gpu,yolov3_darknet,Fused_AddN_Cast_Transpose_fusion_3633051505143196223_kernel,1001.0,3098.2,3206.936,3004.89,1001.0,401.187,443.131,372.475,10001.0,377.689,441.592,372.281,10001.0,377.689,441.592,372.281,10001.0,377.992,442.776,372.346,10001.0,141.011,921.138,138.59,10001.0,143.627,150.302,141.343
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_10434588479252844865_kernel,1001.0,3.105,6.144,3.007,1001.0,3.133,6.303999999999999,3.072,10001.0,3.11,6.112,3.071,10001.0,3.11,6.112,3.071,10001.0,3.11,5.984,3.071,10001.0,2.549,5.728,2.496,10001.0,4.249000,4.896000,4.191000
output_gpu,yolov3_darknet,Fused_Maximum_Mul_Sub_Abs_Neg_Exp_Add_Log_Add_Mul_Sub_Mul_Less_Cast_Reshape_Mul__more_split_5148608022566047223_kernel,1001.0,2.48,6.047999999999999,2.432,1001.0,2.486,6.4,2.432,10001.0,2.4730000000000003,6.047999999999999,2.431,10001.0,2.4730000000000003,6.047999999999999,2.431,10001.0,2.474,6.048,2.431,10001.0,2.479,3.808,2.431,10001.0,4.221000,5.217000,4.159000
output_gpu,yolov3_darknet,Fused_AddN_Cast_Transpose_fusion_15880005747626556498_kernel,1001.0,147.623,166.91,140.415,1001.0,53.731,61.919,52.927,10001.0,47.951,59.32599999999999,43.679,10001.0,47.951,59.32599999999999,43.679,10001.0,46.374,59.871,43.615,10001.0,38.551,47.84,37.312,10001.0,40.861000,46.783000,39.616000
output_gpu,yolov3_darknet,Fused_Add_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_12065673734468492112_kernel,1001.0,250.645,261.34,244.669,1001.0,89.77799999999999,94.047,89.215,10001.0,92.907,109.182,88.99,10001.0,92.907,109.182,88.99,10001.0,93.042,109.662,88.89399999999999,10001.0,43.528,49.407,42.367,10001.0,47.638,53.856,46.623999999999995
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_1668508625948198013_kernel,1001.0,1.431,4.256,1.4069999999999998,1001.0,1.439,4.16,1.4069999999999998,10001.0,1.436,4.351,1.4069999999999998,10001.0,1.436,4.351,1.4069999999999998,10001.0,1.436,4.224,1.407,10001.0,1.439,4.672,1.407,10001.0,3.201000,4.063000,3.135000
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_4693619965623305281_kernel,1001.0,521.444,536.281,507.801,1001.0,101.88,108.063,101.151,10001.0,105.309,124.061,101.086,10001.0,105.309,124.061,101.086,10001.0,101.627,120.158,98.558,10001.0,73.741,86.59,71.327,10001.0,80.028,84.287,78.816
output_gpu,yolov3_darknet,Fused_Maximum_Mul_Sub_Abs_Neg_Exp_Add_Log_Add_Mul_Sub_Mul_Less_Cast_Reshape_Mul__more_split_3750181795376720472_kernel,1001.0,4.613,8.192,4.512,1001.0,3.929,5.28,3.84,10001.0,4.566,8.288,4.479,10001.0,4.566,8.288,4.479,10001.0,4.565,8.479,4.479,10001.0,3.918,5.376,3.808,10001.0,6.232000,7.104000,6.112000
output_gpu,yolov3_darknet,Fused_Cast_BiasAdd_Transpose_fusion_1829409357409896670_kernel,1001.0,443.444,495.866,420.891,1001.0,451.611,1082.131,412.506,10001.0,414.765,485.015,411.192,10001.0,414.765,485.015,411.192,10001.0,416.605,485.36800000000005,410.52,10001.0,172.57500000000002,887.378,170.077,10001.0,186.12400000000002,201.95,182.558
output_gpu,yolov3_darknet,Fused_Cast_BiasAdd_Transpose_fusion_785487230899995099_kernel,1001.0,123.238,131.87,122.462,1001.0,102.414,106.047,101.823,10001.0,105.953,123.23,101.694,10001.0,105.953,123.23,101.694,10001.0,105.212,123.262,101.63,10001.0,46.575,49.247,45.088,10001.0,52.602000000000004,55.199,51.392
output_gpu,yolov3_darknet,Fused_Add_fusion_12127065547842097563_kernel,1001.0,175.65,183.742,173.438,1001.0,177.739,183.93400000000003,176.574,10001.0,173.94299999999996,184.156,172.317,10001.0,173.94299999999996,184.156,172.317,10001.0,173.791,183.005,172.317,10001.0,188.966,874.3539999999999,184.638,10001.0,177.587,181.59900000000002,176.254
output_gpu,yolov3_darknet,Fused_Add_split_17860235963285917190_kernel,1001.0,353.445,362.491,347.996,1001.0,352.889,999.252,345.852,10001.0,346.834,361.017,345.145,10001.0,346.834,361.017,345.145,10001.0,347.136,361.85,345.274,10001.0,375.713,380.698,362.65,10001.0,351.74100000000004,355.773,350.397
output_gpu,yolov3_darknet,Fused_Transpose_split_4566969659230709402_kernel,1001.0,32.0,34.848,31.648000000000003,1001.0,18.252,21.44,17.952,10001.0,18.084,20.992,16.256,10001.0,18.084,20.992,16.256,10001.0,17.634,21.056,16.256,10001.0,12.589,15.488,12.32,10001.0,13.923000,14.816000,13.632000
output_gpu,yolov3_darknet,Fused_Mul_Add_Mul_Add_Minimum_Sub_Sub_Maximum_Sub_Maximum_split_6491866283294118894_kernel,1001.0,18.495,20.896,17.344,1001.0,16.714000000000002,22.015,15.968,10001.0,16.276,21.119,14.784,10001.0,16.276,21.119,14.784,10001.0,16.69,20.96,15.775999999999998,10001.0,15.048,17.663999999999998,14.239999999999998,10001.0,18.895,20.448,17.888
output_gpu,yolov3_darknet,Fused_Cast_split_17518194487704274734_kernel,1001.0,48.774,54.496,48.128,1001.0,48.285,51.648,47.583,10001.0,48.437,54.55900000000001,47.647,10001.0,48.437,54.55900000000001,47.647,10001.0,48.529,54.239,47.61600000000001,10001.0,44.736,49.183,43.743,10001.0,50.789,54.624,49.92
output_gpu,yolov3_darknet,Fused_Add_Add_Add_Mul_Add_Add_Add_Mul_Add_Add_Add_Add_Mul_Add_split_8253779166426137294_kernel,1001.0,1.507,3.616,1.471,1001.0,1.2919999999999998,1.7919999999999998,1.279,10001.0,1.4980000000000002,9.376,1.471,10001.0,1.4980000000000002,9.376,1.471,10001.0,1.497,3.52,1.471,10001.0,1.294,1.952,1.279,10001.0,3.116000,4.224000,3.071000
output_gpu,yolov3_darknet,Fused_Add_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_4655960534505059885_kernel,1001.0,536.13,556.4730000000001,525.241,1001.0,104.162,109.951,103.231,10001.0,107.699,125.374,103.23,10001.0,107.699,125.374,103.23,10001.0,106.782,125.63,103.199,10001.0,82.238,90.846,79.935,10001.0,87.012,91.679,85.663
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_1396235672263232369_kernel,1001.0,1335.583,1351.503,1312.111,1001.0,398.095,405.083,396.124,10001.0,402.978,461.975,396.504,10001.0,402.978,461.975,396.504,10001.0,396.275,455.672,390.809,10001.0,316.682,1085.904,310.459,10001.0,334.757,363.741,329.949
output_gpu,yolov3_darknet,Fused_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_1681448105203669100_kernel,1001.0,588.999,601.336,573.6569999999999,1001.0,46.65,51.167,46.239,10001.0,48.516000000000005,58.015,44.927,10001.0,48.516000000000005,58.015,44.927,10001.0,47.831,57.823,44.67100000000001,10001.0,44.806,50.496,44.031,10001.0,53.522000,61.376000,50.752000
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_16762074751097610432_kernel,1001.0,11.005,21.344,10.688,1001.0,14.145,15.935,13.696,10001.0,14.034,18.272,12.96,10001.0,14.034,18.272,12.96,10001.0,14.137,18.24,13.536,10001.0,10.591,12.576,10.048,10001.0,12.246000,14.784000,11.808000
output_gpu,yolov3_darknet,Fused_Transpose_fusion_14126140204634983704_kernel,1001.0,199.544,209.597,198.141,1001.0,57.163,60.543,56.607,10001.0,60.21,68.44600000000001,56.415,10001.0,60.21,68.44600000000001,56.415,10001.0,59.461000000000006,68.702,56.447,10001.0,47.295,54.559000000000005,44.671,10001.0,51.507,54.752,50.079
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_16189785156585381433_kernel,1001.0,1.329,4.096,1.311,1001.0,1.253,4.256,1.215,10001.0,1.251,4.032,1.215,10001.0,1.251,4.032,1.215,10001.0,1.252,4.0,1.215,10001.0,1.252,4.256,1.215,10001.0,2.983000,3.488000,2.911000
output_gpu,yolov3_darknet,Fused_Cast_fusion_1481848414456064298_kernel,1001.0,2.1180000000000003,4.992,2.079,1001.0,2.116,2.624,2.079,10001.0,2.114,5.055,2.079,10001.0,2.114,5.055,2.079,10001.0,2.115,5.024,2.079,10001.0,1.822,2.272,1.791,10001.0,3.912000,4.640000,3.839000
output_gpu,yolov3_darknet,Fused_Cast_split_14777600939563056334_kernel,1001.0,13.13,19.84,12.544,1001.0,12.970999999999998,17.215,12.384,10001.0,13.140999999999998,19.68,12.512,10001.0,13.140999999999998,19.68,12.512,10001.0,13.136,19.36,12.448,10001.0,11.227,16.864,10.528,10001.0,15.243,19.616000000000003,14.4
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_18299046337899818110_kernel,1001.0,1743.305,1773.802,1708.4589999999998,1001.0,394.328,1252.432,361.628,10001.0,362.769,423.704,358.777,10001.0,362.769,423.704,358.777,10001.0,362.5,424.28,359.098,10001.0,146.677,850.099,143.39,10001.0,156.018,160.57399999999998,154.463
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_14551479352420402373_kernel,1001.0,594.839,634.808,566.809,1001.0,459.147,514.266,435.259,10001.0,440.637,512.758,436.056,10001.0,440.637,512.758,436.056,10001.0,440.971,512.5020000000002,435.896,10001.0,603.05,1197.134,602.5509999999999,10001.0,668.2719999999999,668.89,668.025
output_gpu,yolov3_darknet,Fused_Transpose_split_11894930320330768025_kernel,1001.0,127.078,133.502,125.887,1001.0,120.342,123.871,119.678,10001.0,104.119,123.261,102.302,10001.0,104.119,123.261,102.302,10001.0,105.253,123.422,102.334,10001.0,57.892,66.303,54.143,10001.0,62.16,64.832,61.343
output_gpu,yolov3_darknet,Fused_Transpose_split_4925058842912938559_kernel,1001.0,468.325,515.545,445.21,1001.0,449.476,480.89,410.651,10001.0,413.079,480.567,409.016,10001.0,413.079,480.567,409.016,10001.0,413.97,480.503,408.856,10001.0,223.312,950.161,218.653,10001.0,244.897,247.485,243.357
output_gpu,yolov3_darknet,Fused_Cast_fusion_18240638145374119225_kernel,1001.0,1.246,3.936,1.215,1001.0,1.246,3.967,1.215,10001.0,1.244,9.791,1.215,10001.0,1.244,9.791,1.215,10001.0,1.245,4.032,1.215,10001.0,1.245,2.944,1.215,10001.0,3.019000,6.304000,2.943000
output_gpu,yolov3_darknet,Fused_Cast_fusion_1496272649680960439_kernel,1001.0,1.543,4.288,1.504,1001.0,1.54,1.952,1.503,10001.0,1.54,4.512,1.503,10001.0,1.54,4.512,1.503,10001.0,1.541,8.832,1.503,10001.0,1.325,1.728,1.311,10001.0,3.322000,3.871000,3.263000
output_gpu,yolov3_darknet,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.422,1.952,1.376,10001.0,1.421,3.52,1.4069999999999998,10001.0,1.421,3.52,1.4069999999999998,10001.0,1.421,3.488,1.376,10001.0,1.432,1.984,1.407,10001.0,3.061000,3.840000,3.007000
output_gpu,yolov3_darknet,Fused_Mul_Maximum_split_6055034965176675136_kernel,1001.0,24.254,30.527,23.296,1001.0,24.358,685.24,23.007,10001.0,23.969,30.943,22.783,10001.0,23.969,30.943,22.783,10001.0,24.14,30.815,23.008000000000003,10001.0,37.162,40.031,36.255,10001.0,26.3,31.230999999999998,24.768000
output_gpu,yolov3_darknet,Fused_Cast_fusion_225784922328306707_kernel,1001.0,2.382,5.247999999999999,2.303,1001.0,2.38,5.247999999999999,2.303,10001.0,2.378,5.376,2.272,10001.0,2.378,5.376,2.272,10001.0,2.377,5.312,2.271,10001.0,3.522,4.0,3.328,10001.0,4.151000,4.768000,4.063000
output_gpu,yolov3_darknet,Fused_Transpose_fusion_17611905813747994326_kernel,1001.0,242.061,259.03700000000003,231.965,1001.0,206.16,915.797,204.765,10001.0,210.034,243.164,205.116,10001.0,210.034,243.164,205.116,10001.0,209.136,242.939,205.116,10001.0,111.433,738.453,110.43,10001.0,125.06600000000002,127.51899999999999,123.967
output_gpu,yolov3_darknet,Fused_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_5379635622187716337_kernel,1001.0,147.08,161.47,141.054,1001.0,41.567,43.743,40.768,10001.0,42.729000000000006,51.935,40.639,10001.0,42.729000000000006,51.935,40.639,10001.0,45.397,53.087,41.919,10001.0,24.775,29.088,24.224,10001.0,29.084000,32.703000,28.288000
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_10130568491166108035_kernel,1001.0,677.623,704.503,659.8639999999999,1001.0,193.24400000000003,196.893,192.285,10001.0,196.104,228.859,192.028,10001.0,196.104,228.859,192.028,10001.0,196.676,229.02,191.74,10001.0,78.332,90.911,75.359,10001.0,84.40899999999999,89.727,83.103
output_gpu,yolov3_darknet,Fused_Transpose_split_6969201987078612623_kernel,1001.0,45.344,49.75899999999999,44.511,1001.0,40.453,42.72,39.583,10001.0,43.014,48.895,39.647,10001.0,43.014,48.895,39.647,10001.0,43.427,50.303,39.679,10001.0,27.233,574.391,24.896,10001.0,30.114000,32.608000,27.712000
output_gpu,yolov3_darknet,Fused_Cast_fusion_7386713139835719987_kernel,1001.0,3.649,6.527,3.584,1001.0,3.642,4.256,3.584,10001.0,3.636,6.56,3.583,10001.0,3.636,6.56,3.583,10001.0,3.636,6.496,3.583,10001.0,5.461,5.92,5.183,10001.0,5.857000,6.912000,5.759000
output_gpu,yolov3_darknet,Fused_Cast_BiasAdd_Transpose_fusion_17774518515340909332_kernel,1001.0,16.974,19.904,16.671,1001.0,16.962,17.44,16.608,10001.0,16.258,19.872,15.072,10001.0,16.258,19.872,15.072,10001.0,16.074,19.68,15.008,10001.0,12.385,13.024,12.16,10001.0,13.699000,15.296000,13.408000
output_gpu,yolov3_darknet,Fused_Cast_Transpose_split_16762074751097610432_kernel,1001.0,11.017,21.344,10.719,1001.0,14.139,16.448,13.664,10001.0,14.021,18.303,13.375,10001.0,14.021,18.303,13.375,10001.0,14.124,18.399,13.024,10001.0,10.403,12.32,9.888,10001.0,12.709000,15.137000,11.552000
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_10272695155965646884_kernel,1001.0,222.61900000000003,231.55,221.30900000000003,1001.0,39.798,47.007,39.2,10001.0,38.382,45.568000000000005,37.183,10001.0,38.382,45.568000000000005,37.183,10001.0,38.812,45.728,37.343,10001.0,37.094,41.023,36.064,10001.0,39.18,43.2,38.368
output_gpu,yolov3_darknet,Fused_Transpose_split_17490125114700437933_kernel,1001.0,32.024,35.232,31.68,1001.0,18.626,22.143,18.272,10001.0,18.549,21.664,16.608,10001.0,18.549,21.664,16.608,10001.0,17.913,21.632,16.544,10001.0,10.522,11.423,10.24,10001.0,13.654000,14.848000,12.608000
output_gpu,yolov3_darknet,Fused_Cast_fusion_15970902786901184705_kernel,1001.0,1.668,4.448,1.632,1001.0,1.665,2.208,1.631,10001.0,1.664,4.48,1.631,10001.0,1.664,4.48,1.631,10001.0,1.665,6.56,1.631,10001.0,1.669,4.704,1.631,10001.0,3.451000,4.032000,3.391000
output_gpu,yolov3_darknet,Fused_Transpose_split_4144845131523122819_kernel,1001.0,211.259,218.845,209.373,1001.0,70.748,811.446,68.991,10001.0,64.78,73.279,61.023,10001.0,64.78,73.279,61.023,10001.0,65.384,73.66300000000003,61.023,10001.0,52.104,58.335,48.703,10001.0,55.819,59.712,53.12
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_12404958877077454904_kernel,1001.0,1110.897,1221.777,1093.17,1001.0,890.6469999999999,1576.909,887.0930000000001,10001.0,894.015,1042.797,889.3589999999999,10001.0,894.015,1042.797,889.3589999999999,10001.0,892.2410000000001,1041.773,888.9119999999999,10001.0,902.887,903.762,902.29,10001.0,1005.6,1113.6870000000001,1001.943
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_13329413020652159099_kernel,1001.0,130.249,133.08700000000002,129.534,1001.0,130.429,135.167,129.662,10001.0,115.336,133.213,111.101,10001.0,115.336,133.213,111.101,10001.0,114.315,133.02200000000002,110.782,10001.0,80.406,643.8620000000001,76.767,10001.0,87.143000,88.543000,86.879000
output_gpu,yolov3_darknet,Fused_Transpose_split_17611905813747994326_kernel,1001.0,236.722,259.933,227.837,1001.0,236.52,900.47,217.79,10001.0,209.031,242.78,205.02,10001.0,209.031,242.78,205.02,10001.0,208.474,243.323,204.988,10001.0,115.059,130.718,110.526,10001.0,127.027,140.47899999999998,124.28699999999999
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_17980545714271336211_kernel,1001.0,498.827,545.273,467.674,1001.0,439.161,1153.138,437.147,10001.0,441.811,514.903,438.072,10001.0,441.811,514.903,438.072,10001.0,441.22,515.062,438.04,10001.0,448.318,1145.775,447.609,10001.0,498.901,500.059,498.267
output_gpu,yolov3_darknet,Fused_Add_fusion_5261878416554002092_kernel,1001.0,90.759,95.743,89.95100000000001,1001.0,90.739,96.414,89.727,10001.0,88.571,96.126,87.455,10001.0,88.571,96.126,87.455,10001.0,88.87299999999998,96.19,87.48599999999998,10001.0,94.815,98.046,91.486,10001.0,94.061,99.839,92.319
output_gpu,yolov3_darknet,Fused_Cast_split_13990087073944920468_kernel,1001.0,190.614,196.669,189.246,1001.0,190.449,194.49400000000003,189.181,10001.0,189.482,196.508,188.413,10001.0,189.482,196.508,188.413,10001.0,189.523,196.7,188.381,10001.0,175.239,179.06900000000002,173.63,10001.0,192.885,198.36599999999999,191.806
output_gpu,yolov3_darknet,Fused_Add_split_12127065547842097563_kernel,1001.0,176.783,183.23,173.726,1001.0,175.127,886.965,172.733,10001.0,173.905,183.645,172.34799999999998,10001.0,173.905,183.645,172.34799999999998,10001.0,173.921,183.581,172.38099999999997,10001.0,188.576,892.6579999999999,180.766,10001.0,177.651,181.054,176.22199999999998
output_gpu,yolov3_darknet,Fused_Cast_Transpose_fusion_3528709108505838205_kernel,1001.0,2.04,5.056,2.015,1001.0,2.1,2.624,2.048,10001.0,2.0980000000000003,5.152,2.047,10001.0,2.0980000000000003,5.152,2.047,10001.0,2.097,5.152,2.048,10001.0,1.782,2.112,1.728,10001.0,3.682000,4.256000,3.615000
output_gpu,yolov3_darknet,Fused_Neg_Exp_Add_RealDiv_Sub_BroadcastTo_Mul_Mul_split_13837383422900689501_kernel,1001.0,85.95700000000001,91.199,85.215,1001.0,80.51899999999999,87.262,79.455,10001.0,80.388,87.166,79.102,10001.0,80.388,87.166,79.102,10001.0,80.35499999999998,87.07,78.97500000000002,10001.0,80.48700000000001,854.259,79.00699999999999,10001.0,82.478,87.008,81.18400000000001
output_gpu,yolov3_darknet,Fused_Transpose_split_15362256644294503984_kernel,1001.0,677.29,684.9830000000001,671.959,1001.0,276.501,283.133,259.197,10001.0,253.395,284.123,248.05900000000003,10001.0,253.395,284.123,248.05900000000003,10001.0,254.652,284.187,248.635,10001.0,177.755,195.421,173.181,10001.0,188.673,192.83,186.494
output_gpu,ResNet101,Fused_Cast_split_8351751667762880221_kernel,1001.0,1.237,3.936,1.215,1001.0,1.234,1.6,1.215,10001.0,1.235,4.0,1.215,10001.0,1.235,4.0,1.215,10001.0,1.234,4.0,1.215,10001.0,1.065,1.408,1.023,10001.0,2.991000,3.488000,2.943000
output_gpu,ResNet101,Fused_ReduceSum_split_12450087900310959907_kernel,1001.0,3.165,6.207999999999999,3.135,1001.0,3.162,3.936,3.135,10001.0,1.867,4.6080000000000005,1.824,10001.0,1.867,4.6080000000000005,1.824,10001.0,1.597,4.383,1.567,10001.0,1.372,1.696,1.343,10001.0,3.310000,3.808000,3.263000
output_gpu,ResNet101,Fused_Transpose_split_9316999070151804950_kernel,1001.0,47.767,51.103,47.295,1001.0,47.307,49.151,46.847,10001.0,44.666,51.167,40.99100000000001,10001.0,44.666,51.167,40.99100000000001,10001.0,44.409,51.615,40.959,10001.0,32.564,37.28,30.048,10001.0,34.971000000000004,38.944,33.536
output_gpu,ResNet101,Fused_Cast_split_12477723516016570265_kernel,1001.0,1.662,4.416,1.631,1001.0,1.658,2.112,1.631,10001.0,1.658,4.48,1.631,10001.0,1.658,4.48,1.631,10001.0,1.657,4.64,1.631,10001.0,1.511,2.048,1.471,10001.0,3.427000,4.353000,3.359000
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_13531162913956986807_kernel,1001.0,2.22,5.376,2.176,1001.0,2.262,2.688,2.208,10001.0,2.263,5.12,2.208,10001.0,2.263,5.12,2.208,10001.0,2.26,5.088,2.208,10001.0,1.995,5.248,1.951,10001.0,3.732000,4.352000,3.679000
output_gpu,ResNet101,Fused_Cast_split_3210942441325238138_kernel,1001.0,1.422,4.288,1.4069999999999998,1001.0,1.419,1.7919999999999998,1.376,10001.0,1.419,4.256,1.376,10001.0,1.419,4.256,1.376,10001.0,1.423,9.184,1.376,10001.0,1.421,1.824,1.407,10001.0,3.378000,3.968000,3.295000
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_6229241650554322839_kernel,1001.0,44.517,50.07899999999999,43.071000000000005,1001.0,40.392,43.52,39.84,10001.0,39.74500000000001,45.599,38.464,10001.0,39.74500000000001,45.599,38.464,10001.0,39.799,45.823,38.591,10001.0,49.318,53.503,46.879,10001.0,53.620000000000005,58.590999999999994,51.742999999999995
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_17884474370717013435_kernel,1001.0,1.927,4.736000000000001,1.887,1001.0,1.31,4.512,1.28,10001.0,1.307,4.095,1.279,10001.0,1.307,4.095,1.279,10001.0,1.307,4.096,1.279,10001.0,1.308,1.664,1.279,10001.0,3.276000,3.807000,3.199000
output_gpu,ResNet101,Fused_Sub_Exp_ReduceSum_split_883961644439154119_kernel,1001.0,4.837,7.711,4.799,1001.0,4.836,5.472,4.799,10001.0,2.8510000000000004,6.08,2.815,10001.0,2.8510000000000004,6.08,2.815,10001.0,2.853,6.143,2.815,10001.0,2.446,3.264,2.399,10001.0,4.528000,5.472000,4.448000
output_gpu,ResNet101,Fused_Cast_split_16560149568435435788_kernel,1001.0,6.372999999999999,9.536,6.176,1001.0,6.323,7.392,6.176,10001.0,6.372000000000001,9.792,6.144,10001.0,6.372000000000001,9.792,6.144,10001.0,6.372,9.727,6.144,10001.0,9.501,12.639,9.151,10001.0,9.013000,10.080000,8.672000
output_gpu,ResNet101,Fused_Cast_split_7386713139835719987_kernel,1001.0,3.639,6.464,3.584,1001.0,3.636,4.096,3.583,10001.0,3.635,6.527,3.583,10001.0,3.635,6.527,3.583,10001.0,3.636,6.7200000000000015,3.583,10001.0,5.447,5.888,5.152,10001.0,5.588000,6.239000,5.503000
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_746009665116862803_kernel,1001.0,112.276,121.087,111.134,1001.0,21.267,24.928,20.767,10001.0,21.269,27.52,20.22300000000001,10001.0,21.269,27.52,20.22300000000001,10001.0,20.922,27.583,19.871,10001.0,19.585,23.744,18.72,10001.0,21.818,26.592000000000002,20.800000
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_10572641205232097944_kernel,1001.0,5.587999999999999,8.704,5.502999999999999,1001.0,5.653,9.056,5.568,10001.0,5.626,8.767999999999999,5.535,10001.0,5.626,8.767999999999999,5.535,10001.0,5.626,9.024,5.535,10001.0,3.406,6.624,3.359,10001.0,5.143000,5.824000,5.055000
output_gpu,ResNet101,Fused_Cast_Transpose_fusion_7346313530950028297_kernel,1001.0,1.649,4.384,1.6,1001.0,1.715,2.176,1.664,10001.0,1.711,4.512,1.664,10001.0,1.711,4.512,1.664,10001.0,1.711,4.608,1.663,10001.0,1.478,1.888,1.439,10001.0,3.480000,4.000000,3.423000
output_gpu,ResNet101,Fused_Cast_Transpose_split_10572641205232097944_kernel,1001.0,5.589,8.96,5.472,1001.0,5.65,6.272,5.5360000000000005,10001.0,5.629,8.8,5.535,10001.0,5.629,8.8,5.535,10001.0,5.625,8.8,5.535,10001.0,3.409,6.784,3.359,10001.0,5.119000,5.760000,5.055000
output_gpu,ResNet101,Fused_Cast_split_8294321680628880039_kernel,1001.0,18.178,23.744,17.727999999999998,1001.0,18.169,21.056,17.791,10001.0,18.173,23.424,17.791,10001.0,18.173,23.424,17.791,10001.0,18.171,23.552,17.76,10001.0,19.543000000000003,22.911,18.272,10001.0,20.093000,23.296,19.584000
output_gpu,ResNet101,Fused_Cast_split_6666346426345841937_kernel,1001.0,3.662,6.432,3.615,1001.0,3.652,4.064,3.615,10001.0,3.635,6.528,3.583,10001.0,3.635,6.528,3.583,10001.0,3.634,6.592,3.583,10001.0,5.452,5.984,5.119,10001.0,5.708000,6.368000,5.631000
output_gpu,ResNet101,Fused_Cast_split_11738288552156941409_kernel,1001.0,1.661,4.448,1.631,1001.0,1.666,4.768,1.631,10001.0,1.6569999999999998,4.64,1.631,10001.0,1.6569999999999998,4.64,1.631,10001.0,1.657,4.512,1.631,10001.0,1.511,1.888,1.471,10001.0,3.433000,4.032000,3.359000
output_gpu,ResNet101,Fused_Mul_split_9332101872896932291_kernel,1001.0,1.371,3.392,1.3430000000000002,1001.0,1.369,1.952,1.3430000000000002,10001.0,1.369,3.616,1.3430000000000002,10001.0,1.369,3.616,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.177,1.664,1.151,10001.0,3.013000,3.712000,2.944000
output_gpu,ResNet101,Fused_Cast_Transpose_split_13531162913956986807_kernel,1001.0,2.221,5.343999999999999,2.175,1001.0,2.28,5.343999999999999,2.239,10001.0,2.26,5.152,2.208,10001.0,2.26,5.152,2.208,10001.0,2.26,5.12,2.208,10001.0,1.988,8.672,1.951,10001.0,3.730000,4.672000,3.679000
output_gpu,ResNet101,Fused_Cast_split_15050971751546069948_kernel,1001.0,2.1180000000000003,5.184,2.079,1001.0,2.116,2.752,2.08,10001.0,2.114,5.12,2.079,10001.0,2.114,5.12,2.079,10001.0,2.114,5.088,2.079,10001.0,1.923,2.528,1.887,10001.0,4.178000,4.864000,4.127000
output_gpu,ResNet101,Fused_Cast_split_8131585293552244066_kernel,1001.0,6.388999999999999,9.44,6.207999999999999,1001.0,6.33,7.072,6.144,10001.0,6.37,9.568,6.144,10001.0,6.37,9.568,6.144,10001.0,6.38,9.696,6.175,10001.0,9.5,10.4,9.151,10001.0,8.902000,9.888000,8.031000
output_gpu,ResNet101,Fused_Reshape_Tile_RealDiv_Transpose_fusion_8035263416297224545_kernel,1001.0,10.424,14.528,10.208,1001.0,15.552,19.903,14.432,10001.0,15.345999999999998,21.856,12.992,10001.0,15.345999999999998,21.856,12.992,10001.0,15.52,22.56,14.175999999999998,10001.0,22.479,23.551,21.856,10001.0,17.4,21.887,15.935999999999998
output_gpu,ResNet101,Fused_Cast_fusion_7151560994996638679_kernel,1001.0,17.912,23.2,17.439,1001.0,17.913,21.184,17.44,10001.0,17.905,22.72,17.407,10001.0,17.905,22.72,17.407,10001.0,17.76,23.455,16.96,10001.0,19.335,22.975,18.464,10001.0,19.729,22.656,19.2
output_gpu,ResNet101,Fused_Cast_split_13313251452619632340_kernel,1001.0,2.386,5.216,2.303,1001.0,2.384,2.88,2.303,10001.0,2.381,5.279,2.272,10001.0,2.381,5.279,2.272,10001.0,2.383,5.28,2.272,10001.0,3.198,3.584,3.072,10001.0,4.489000,5.152000,4.383000
output_gpu,ResNet101,Fused_Cast_split_3875502119377946490_kernel,1001.0,2.385,5.216,2.3040000000000003,1001.0,2.384,2.912,2.3040000000000003,10001.0,2.382,5.28,2.303,10001.0,2.382,5.28,2.303,10001.0,2.382,5.248,2.303,10001.0,3.51,3.936,3.391,10001.0,4.384000,5.056000,4.287000
output_gpu,ResNet101,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.371,3.392,1.3430000000000002,1001.0,1.423,3.424,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.52,1.407,10001.0,1.432,3.488,1.407,10001.0,3.057000,3.744000,3.007000
output_gpu,ResNet101,Fused_Cast_split_1481848414456064298_kernel,1001.0,2.1180000000000003,4.96,2.08,1001.0,2.116,2.72,2.08,10001.0,2.113,5.216,2.079,10001.0,2.113,5.216,2.079,10001.0,2.113,5.024,2.079,10001.0,1.926,2.368,1.887,10001.0,4.193000,4.896000,4.127000
output_gpu,ResNet101,Fused_Cast_split_8053132762655673933_kernel,1001.0,1.294,4.096,1.279,1001.0,1.2919999999999998,1.664,1.279,10001.0,1.291,4.128,1.247,10001.0,1.291,4.128,1.247,10001.0,1.292,4.064,1.247,10001.0,1.291,1.664,1.247,10001.0,3.140000,3.712000,3.071000
output_gpu,ResNet101,Fused_Cast_split_6311674897489810490_kernel,1001.0,1.293,4.128,1.248,1001.0,1.2919999999999998,1.664,1.248,10001.0,1.2919999999999998,4.192,1.248,10001.0,1.2919999999999998,4.192,1.248,10001.0,1.292,4.416,1.248,10001.0,1.292,2.016,1.248,10001.0,3.213000,4.416000,3.136000
output_gpu,RetinaFace_ResNet50,Fused_ReduceMax_split_12824154059640471129_kernel,1001.0,355.57800000000003,385.371,329.755,1001.0,361.175,383.099,329.755,10001.0,4.319,7.392,4.287,10001.0,4.319,7.392,4.287,10001.0,2.93,8.576,2.879,10001.0,2.96,13.311,2.911,10001.0,4.662000,5.312000,4.607000
output_gpu,RetinaFace_ResNet50,Fused_Sub_Exp_ReduceSum_Sub_Log_Add_Reshape_Sub_Reshape_Mul_split_1086739347885138236_kernel,1001.0,364.829,386.395,330.651,1001.0,330.579,331.195,330.459,10001.0,11.68,16.991,10.752,10001.0,11.68,16.991,10.752,10001.0,11.539,16.544,10.784,10001.0,11.525,16.224,10.624,10001.0,13.681000,17.184000,12.704000
output_gpu,RetinaFace_ResNet50,Fused_AddN_8897683045701803071_kernel,1001.0,449.023,452.251,445.05,1001.0,449.383,452.762,445.146,10001.0,450.732,453.015,445.6230000000001,10001.0,450.732,453.015,445.6230000000001,10001.0,450.696,452.824,445.656,10001.0,442.506,447.482,439.259,10001.0,450.95000000000005,454.62,448.412
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_Add_fusion_436169102862513836_kernel,1001.0,119.066,125.278,118.303,1001.0,119.069,126.014,118.206,10001.0,119.083,125.18099999999998,118.109,10001.0,119.083,125.18099999999998,118.109,10001.0,119.092,125.022,117.886,10001.0,117.878,123.743,116.863,10001.0,121.359,125.18299999999999,120.255
output_gpu,RetinaFace_ResNet50,Fused_Add_MaximumGrad_Mul_Add_fusion_914968364133825172_kernel,1001.0,11.543,13.28,10.688,1001.0,11.513,12.224,10.688,10001.0,11.519,13.248,10.528,10001.0,11.519,13.248,10.528,10001.0,11.513,13.28,10.527,10001.0,10.132,11.168,8.896,10001.0,13.669000,15.263000,12.575000
output_gpu,RetinaFace_ResNet50,Fused_AddN_4295269851325086236_kernel,1001.0,113.434,119.199,112.606,1001.0,114.227,117.502,113.375,10001.0,114.08,119.134,112.606,10001.0,114.08,119.134,112.606,10001.0,114.107,120.126,112.606,10001.0,113.287,116.319,112.287,10001.0,115.866,119.647,114.655
output_gpu,RetinaFace_ResNet50,Fused_MaximumGrad_Mul_Add_fusion_10097465813327699244_kernel,1001.0,441.278,444.827,437.723,1001.0,442.059,1070.833,437.754,10001.0,442.693,444.887,437.784,10001.0,442.693,444.887,437.784,10001.0,442.779,445.048,437.592,10001.0,434.417,1169.457,428.058,10001.0,442.818,446.716,441.14799999999997
output_gpu,RetinaFace_ResNet50,Fused_Reshape_BroadcastTo_Mul_BroadcastTo_Mul_split_7571614325500164859_kernel,1001.0,353.135,385.947,330.236,1001.0,17.44,21.312,16.544,10001.0,17.438,23.935,16.448,10001.0,17.438,23.935,16.448,10001.0,17.434,24.832,16.544,10001.0,17.454,24.383,16.448,10001.0,18.742000,22.944,17.855000
output_gpu,RetinaFace_ResNet50,Fused_AddN_MaximumGrad_Mul_Add_fusion_11496463622957835650_kernel,1001.0,172.55599999999998,178.68599999999998,171.294,1001.0,173.767,178.30200000000002,172.765,10001.0,173.56,179.165,171.32500000000005,10001.0,173.56,179.165,171.32500000000005,10001.0,173.588,179.517,171.453,10001.0,171.857,177.566,169.69400000000002,10001.0,174.829,179.551,172.734
output_gpu,RetinaFace_ResNet50,Fused_Maximum_RealDiv_RealDiv_RealDiv_split_7798465354901567530_kernel,1001.0,1.686,3.968,1.663,1001.0,1.6869999999999998,4.0,1.663,10001.0,1.685,7.84,1.663,10001.0,1.685,7.84,1.663,10001.0,1.684,3.904,1.663,10001.0,1.684,2.368,1.663,10001.0,3.293000,4.288000,3.231000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Mul_Reshape_Neg_split_4752622243029409788_kernel,1001.0,368.246,385.5630000000001,348.219,1001.0,2.929,3.968,2.847,10001.0,2.937,6.303999999999999,2.8160000000000003,10001.0,2.937,6.303999999999999,2.8160000000000003,10001.0,2.926,6.336,2.815,10001.0,2.943,9.824,2.816,10001.0,5.036000,5.760000,4.863000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_7525082745073579616_kernel,1001.0,4.743,7.744,4.544,1001.0,4.736000000000001,5.343999999999999,4.543,10001.0,4.74,7.712000000000001,4.544,10001.0,4.74,7.712000000000001,4.544,10001.0,4.739,7.744,4.512,10001.0,3.252,3.936,3.135,10001.0,6.374000,7.200000,6.175000
output_gpu,RetinaFace_ResNet50,Fused_MaximumGrad_Mul_Add_fusion_12542635580259279829_kernel,1001.0,113.571,119.103,112.67,1001.0,114.315,116.798,113.566,10001.0,114.196,119.358,112.67,10001.0,114.196,119.358,112.67,10001.0,114.171,119.005,112.702,10001.0,113.057,115.55,111.07,10001.0,116.018,119.327,114.815
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_2724281381040789777_kernel,1001.0,26.859,32.319,26.24,1001.0,22.759,26.272,22.144,10001.0,22.706,28.863000000000003,21.823,10001.0,22.706,28.863000000000003,21.823,10001.0,22.688,28.19100000000001,21.824,10001.0,21.906,25.855,21.087000000000003,10001.0,24.445999999999998,28.608,23.392
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_2401022420578777103_kernel,1001.0,1.662,4.5760000000000005,1.631,1001.0,1.673,2.112,1.632,10001.0,1.6740000000000002,4.512,1.632,10001.0,1.6740000000000002,4.512,1.632,10001.0,1.673,4.544,1.632,10001.0,1.685,9.887,1.663,10001.0,3.464000,4.000000,3.391000
output_gpu,RetinaFace_ResNet50,Fused_BroadcastTo_Mul_split_9027761569946414568_kernel,1001.0,362.147,385.5630000000001,329.755,1001.0,4.133,4.8,4.032,10001.0,4.121,7.199,4.032,10001.0,4.121,7.199,4.032,10001.0,4.106,7.36,4.0,10001.0,4.115,7.616,4.031,10001.0,5.861000,6.528000,5.759000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_Add_fusion_9917388029749986038_kernel,1001.0,461.55300000000005,467.802,460.41,1001.0,462.323,1130.032,460.505,10001.0,461.709,467.543,460.1830000000001,10001.0,461.709,467.543,460.1830000000001,10001.0,461.736,467.639,460.216,10001.0,456.131,462.298,454.298,10001.0,463.37,467.99600000000004,461.78799999999995
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_16543279096509163052_kernel,1001.0,348.389,354.555,347.163,1001.0,348.19800000000004,351.835,347.227,10001.0,348.19,353.81699999999995,346.809,10001.0,348.19,353.81699999999995,346.809,10001.0,348.592,353.88199999999995,347.098,10001.0,357.115,363.86800000000005,352.95500000000004,10001.0,351.39300000000003,355.516,349.917
output_gpu,RetinaFace_ResNet50,Fused_AddN_MaximumGrad_Mul_Add_fusion_17729536345969579331_kernel,1001.0,46.42100000000001,53.247,45.823,1001.0,46.572,51.136,45.983,10001.0,46.524,53.215,45.75899999999999,10001.0,46.524,53.215,45.75899999999999,10001.0,46.482,53.631,45.75899999999999,10001.0,46.377,50.688,45.568,10001.0,48.524,53.439,47.711999999999996
output_gpu,RetinaFace_ResNet50,Fused_MaximumGrad_Mul_Add_fusion_1544652197274835328_kernel,1001.0,30.38,36.287,29.471,1001.0,30.364,35.935,29.728,10001.0,30.326,35.74299999999999,29.567,10001.0,30.326,35.74299999999999,29.567,10001.0,30.34,35.775,29.535,10001.0,30.224,33.664,29.472,10001.0,32.800000,35.999000,31.712000
output_gpu,RetinaFace_ResNet50,Fused_AddN_2260910771692391499_kernel,1001.0,440.279,444.378,437.115,1001.0,442.476,446.137,441.305,10001.0,442.24,444.59900000000016,437.08,10001.0,442.24,444.59900000000016,437.08,10001.0,442.357,444.759,437.36800000000005,10001.0,434.455,1160.273,428.283,10001.0,442.353,446.044,440.924
output_gpu,RetinaFace_ResNet50,Fused_MaximumGrad_Mul_Add_fusion_1032472401514225041_kernel,1001.0,111.912,117.598,111.007,1001.0,112.678,116.99,111.902,10001.0,112.501,117.854,111.038,10001.0,112.501,117.854,111.038,10001.0,112.516,117.534,110.974,10001.0,110.916,824.4050000000001,109.79,10001.0,114.408,118.42999999999999,113.247
output_gpu,RetinaFace_ResNet50,Fused_Maximum_RealDiv_Mul_Add_Maximum_RealDiv_Add_split_7684903393753447337_kernel,1001.0,1.655,3.648,1.631,1001.0,1.654,2.3040000000000003,1.631,10001.0,1.653,3.712,1.631,10001.0,1.653,3.712,1.631,10001.0,1.653,3.84,1.631,10001.0,1.654,3.68,1.631,10001.0,3.408000,4.224000,3.327000
output_gpu,RetinaFace_ResNet50,Fused_Mul_fusion_13296463481088049281_kernel,1001.0,1.3869999999999998,3.36,1.344,1001.0,1.3869999999999998,3.488,1.344,10001.0,1.385,3.488,1.3430000000000002,10001.0,1.385,3.488,1.3430000000000002,10001.0,1.386,3.456,1.344,10001.0,1.191,1.76,1.151,10001.0,3.025000,3.712000,2.975000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_7332690976026815438_kernel,1001.0,87.61200000000001,94.687,86.84700000000001,1001.0,87.62200000000001,93.983,86.91,10001.0,87.35700000000001,94.206,86.43,10001.0,87.35700000000001,94.206,86.43,10001.0,87.52799999999998,94.302,86.654,10001.0,89.553,93.567,88.382,10001.0,89.56400000000001,93.82300000000001,88.51100000000001
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_3116175346258748967_kernel,1001.0,12.027,15.104,11.872,1001.0,5.393,5.824,5.152,10001.0,5.388999999999999,8.448,5.12,10001.0,5.388999999999999,8.448,5.12,10001.0,5.391,8.544,5.152,10001.0,3.887,9.023,3.744,10001.0,6.209000,7.360000,6.016000
output_gpu,RetinaFace_ResNet50,Fused_Neg_Reshape_Cast_Cast_Mul_Minimum_Reshape_BroadcastTo_Less_Cast_Add_Minimu_more_split_17239937682889967902_kernel,1001.0,356.384,385.755,329.97900000000004,1001.0,4.234,7.903,4.0,10001.0,4.214,7.648,3.936,10001.0,4.214,7.648,3.936,10001.0,4.216,7.808,3.968,10001.0,3.626,9.344,3.424,10001.0,5.839000,6.817000,5.567000
output_gpu,RetinaFace_ResNet50,Fused_ReduceSum_split_3366192580175348419_kernel,1001.0,237.567,242.909,218.717,1001.0,206.95,207.677,206.813,10001.0,2.697,5.92,2.655,10001.0,2.697,5.92,2.655,10001.0,2.697,6.048,2.655,10001.0,2.709,3.552,2.655,10001.0,4.391000,5.312000,4.319000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_15972352060997974967_kernel,1001.0,2.533,5.568,2.495,1001.0,2.176,2.592,2.143,10001.0,2.522,5.568,2.495,10001.0,2.522,5.568,2.495,10001.0,2.522,5.664,2.495,10001.0,2.271,8.64,2.08,10001.0,4.018000,4.512000,3.808000
output_gpu,RetinaFace_ResNet50,Fused_Cast_fusion_14434894943373778927_kernel,1001.0,368.111,385.211,348.219,1001.0,2.397,2.944,2.3040000000000003,10001.0,2.398,5.408,2.272,10001.0,2.398,5.408,2.272,10001.0,2.398,5.408,2.272,10001.0,2.414,2.912,2.304,10001.0,4.238000,4.736000,4.127000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_6686217647084275953_kernel,1001.0,2.045,4.864,2.015,1001.0,2.029,2.432,1.984,10001.0,2.032,9.344,1.984,10001.0,2.032,9.344,1.984,10001.0,2.028,4.896,1.984,10001.0,1.923,5.92,1.887,10001.0,3.670000,4.352000,3.615000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_5083726075392912246_kernel,1001.0,6.346,9.216,6.176,1001.0,5.725,6.464,5.44,10001.0,5.732,8.767999999999999,5.408,10001.0,5.732,8.767999999999999,5.408,10001.0,5.728,8.992,5.408,10001.0,6.426,12.736,6.016,10001.0,7.908000,8.832000,7.456000
output_gpu,RetinaFace_ResNet50,Fused_BroadcastTo_Mul_Maximum_RealDiv_Reshape_BroadcastTo_Mul_split_12442299025705298514_kernel,1001.0,48.258,53.919,47.007,1001.0,42.178,45.92,41.343,10001.0,42.18,48.255,41.279,10001.0,42.18,48.255,41.279,10001.0,42.193000000000005,47.96700000000001,41.247,10001.0,42.276,722.3589999999999,41.216,10001.0,44.451,48.127,43.456
output_gpu,RetinaFace_ResNet50,Fused_AddN_3317398860531744399_kernel,1001.0,31.547,36.384,30.847,1001.0,31.568,34.783,30.879,10001.0,31.494,36.736,30.559,10001.0,31.494,36.736,30.559,10001.0,31.584,36.831,30.719,10001.0,31.141,37.952,30.432,10001.0,33.870999999999995,36.703,32.063
output_gpu,RetinaFace_ResNet50,Fused_Sub_Exp_ReduceSum_Log_split_11941850547297653662_kernel,1001.0,349.982,385.755,329.88300000000004,1001.0,359.547,900.2760000000001,329.88300000000004,10001.0,5.774,9.152,5.502999999999999,10001.0,5.774,9.152,5.502999999999999,10001.0,5.695,8.864,5.439,10001.0,5.703,13.184,5.408,10001.0,7.880000,8.992000,7.584000
output_gpu,RetinaFace_ResNet50,Fused_AddN_10437846869483146613_kernel,1001.0,880.697,884.885,872.757,1001.0,881.465,884.7869999999999,873.1389999999999,10001.0,882.7489999999999,885.2950000000002,872.9110000000001,10001.0,882.7489999999999,885.2950000000002,872.9110000000001,10001.0,882.9060000000001,885.52,873.2639999999999,10001.0,864.818,1595.626,852.533,10001.0,879.25,882.1999999999999,871.3530000000001
output_gpu,RetinaFace_ResNet50,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.42,1.952,1.376,10001.0,1.42,3.488,1.4069999999999998,10001.0,1.42,3.488,1.4069999999999998,10001.0,1.421,3.488,1.407,10001.0,1.229,1.728,1.215,10001.0,3.076000,3.777000,3.007000
output_gpu,RetinaFace_ResNet50,Fused_Add_MaximumGrad_Mul_Add_fusion_17296858175108675208_kernel,1001.0,554.905,558.681,551.001,1001.0,556.266,1269.518,551.512,10001.0,556.153,559.253,551.254,10001.0,556.153,559.253,551.254,10001.0,556.202,558.9340000000002,551.158,10001.0,549.922,554.1049999999999,546.841,10001.0,555.222,559.739,553.819
output_gpu,RetinaFace_ResNet50,Fused_Reciprocal_Mul_BroadcastTo_Mul_split_1965900446373736464_kernel,1001.0,356.767,385.659,329.94699999999995,1001.0,4.835,5.504,4.672,10001.0,4.848,8.288,4.703,10001.0,4.848,8.288,4.703,10001.0,4.841,8.032,4.703,10001.0,4.879,9.184,4.736,10001.0,7.157000,8.320000,7.008000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_16732347805501610880_kernel,1001.0,2.393,5.247999999999999,2.336,1001.0,2.387,5.408,2.3040000000000003,10001.0,2.383,5.28,2.3040000000000003,10001.0,2.383,5.28,2.3040000000000003,10001.0,2.38,5.248,2.303,10001.0,2.297,9.6,2.239,10001.0,4.062000,4.640000,3.967000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_split_3590132217549810569_kernel,1001.0,360.702,385.307,329.756,1001.0,2.591,3.104,2.496,10001.0,2.593,6.112,2.495,10001.0,2.593,6.112,2.495,10001.0,2.597,8.576,2.496,10001.0,2.593,9.76,2.495,10001.0,4.402000,4.960000,4.224000
output_gpu,RetinaFace_ResNet50,Fused_NotEqual_Cast_Cast_split_15570820216269115763_kernel,1001.0,341.569,385.24300000000005,329.787,1001.0,2.77,3.232,2.688,10001.0,2.773,6.047000000000001,2.656,10001.0,2.773,6.047000000000001,2.656,10001.0,2.772,5.728,2.656,10001.0,2.385,9.76,2.303,10001.0,4.886000,5.632000,4.736000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_890968573007231518_kernel,1001.0,89.934,96.287,89.023,1001.0,90.346,777.2689999999999,88.926,10001.0,89.897,95.87,88.99,10001.0,89.897,95.87,88.99,10001.0,89.64,95.966,88.70200000000001,10001.0,91.983,97.727,90.686,10001.0,91.793,96.671,90.751
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_9239820615820299907_kernel,1001.0,1.4509999999999998,4.416,1.408,1001.0,1.446,2.048,1.408,10001.0,1.443,4.6080000000000005,1.4069999999999998,10001.0,1.443,4.6080000000000005,1.4069999999999998,10001.0,1.443,4.672,1.407,10001.0,1.452,8.928,1.407,10001.0,3.218000,3.840000,3.167000
output_gpu,RetinaFace_ResNet50,Fused_ReduceSum_split_17670503119928074267_kernel,1001.0,342.064,385.339,329.755,1001.0,329.769,330.20300000000003,329.723,10001.0,4.32,9.408,4.287,10001.0,4.32,9.408,4.287,10001.0,2.924,5.984,2.879,10001.0,2.958,9.855,2.911,10001.0,4.657000,5.344000,4.607000
output_gpu,RetinaFace_ResNet50,Fused_Add_MaximumGrad_Mul_Add_fusion_3795171911424909895_kernel,1001.0,37.791,44.511,37.088,1001.0,37.792,44.48,37.055,10001.0,37.801,44.191,36.959,10001.0,37.801,44.191,36.959,10001.0,37.794,44.927,36.928,10001.0,37.517,43.936,36.768,10001.0,39.868000,43.936000,38.879000
output_gpu,RetinaFace_ResNet50,Fused_Sub_Mul_ReduceSum_Exp_split_4971327750864844560_kernel,1001.0,353.26800000000003,386.011,330.396,1001.0,330.529,331.06699999999995,330.36300000000006,10001.0,9.921,14.656,9.343,10001.0,9.921,14.656,9.343,10001.0,10.1,14.879,9.471,10001.0,10.104,14.368,9.504,10001.0,12.112000,14.176000,11.168000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_3327765903680772639_kernel,1001.0,24.417,30.688,23.616,1001.0,24.347,28.512,23.487,10001.0,24.358,30.847,23.36,10001.0,24.358,30.847,23.36,10001.0,24.426,30.943,23.424,10001.0,24.686,28.479,23.84,10001.0,26.401999999999997,30.848,25.376000
output_gpu,RetinaFace_ResNet50,Fused_MaximumGrad_Mul_Add_fusion_12098682142080398856_kernel,1001.0,4.375,7.552,4.16,1001.0,3.769,4.448,3.584,10001.0,4.36,7.744,4.16,10001.0,4.36,7.744,4.16,10001.0,4.361,7.712,4.159,10001.0,3.498,7.103,3.359,10001.0,7.013000,8.448000,6.528000
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Equal_Cast_split_6327894943368238017_kernel,1001.0,688.3,766.71,658.5189999999999,1001.0,3.688,4.384,3.616,10001.0,3.684,6.72,3.584,10001.0,3.684,6.72,3.584,10001.0,3.684,6.752,3.584,10001.0,3.708,9.824,3.615,10001.0,5.423000,6.304000,5.312000
output_gpu,RetinaFace_ResNet50,Fused_Mul_Maximum_fusion_7523821044373415183_kernel,1001.0,23.145,29.599,22.368,1001.0,23.15,28.0,22.463,10001.0,23.156,29.76,22.303,10001.0,23.156,29.76,22.303,10001.0,23.248,30.816,22.463,10001.0,23.575,27.936,22.656,10001.0,25.338,30.624,24.448
output_gpu,RetinaFace_ResNet50,Fused_Reshape_Add_Transpose_split_5567194362285659078_kernel,1001.0,3.89,6.783,3.776,1001.0,3.5380000000000003,4.287,3.4560000000000004,10001.0,3.535,7.456,3.455,10001.0,3.535,7.456,3.455,10001.0,3.534,6.496,3.455,10001.0,4.047,7.072,3.712,10001.0,5.649000,6.720000,5.375000
output_gpu,RetinaFace_ResNet50,Fused_Add_MaximumGrad_Mul_Add_fusion_12714701089245170985_kernel,1001.0,140.25,147.10299999999998,139.358,1001.0,141.189,145.63,140.446,10001.0,141.03799999999998,146.653,139.422,10001.0,141.03799999999998,146.653,139.422,10001.0,141.08399999999995,147.102,139.32500000000002,10001.0,139.986,143.90200000000002,138.846,10001.0,142.754,147.422,141.663
output_gpu,RetinaFace_ResNet50,Fused_ReduceSum_split_15100445699409848623_kernel,1001.0,247.237,257.149,231.005,1001.0,218.74400000000003,219.357,218.621,10001.0,2.699,5.888,2.655,10001.0,2.699,5.888,2.655,10001.0,2.698,5.888,2.655,10001.0,2.708,10.08,2.656,10001.0,4.392000,5.280000,3.456000
output_gpu,Efficientnet,Fused_ReduceSum_split_15456491524569318827_kernel,1001.0,2.182,5.44,2.144,1001.0,2.18,5.504,2.143,10001.0,1.983,4.864,1.951,10001.0,1.983,4.864,1.951,10001.0,1.305,4.032,1.279,10001.0,1.304,2.272,1.279,10001.0,3.267000,3.840000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_3856758043319414934_kernel,1001.0,1.4069999999999998,3.392,1.375,1001.0,1.415,2.048,1.376,10001.0,1.405,3.487,1.375,10001.0,1.405,3.487,1.375,10001.0,1.405,3.52,1.375,10001.0,1.405,1.952,1.375,10001.0,3.265000,3.968000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_18358350490655256049_kernel,1001.0,1.345,4.224,1.311,1001.0,1.342,1.7280000000000002,1.311,10001.0,1.342,4.447,1.311,10001.0,1.342,4.447,1.311,10001.0,1.342,4.192,1.311,10001.0,1.349,9.696,1.311,10001.0,3.322000,3.872000,3.263000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_44043658526539525_kernel,1001.0,165.58,171.39,164.669,1001.0,149.89,156.094,148.703,10001.0,149.607,157.18099999999995,148.541,10001.0,149.607,157.18099999999995,148.541,10001.0,149.559,156.637,148.57299999999998,10001.0,149.567,853.623,148.542,10001.0,152.561,156.958,151.391
output_gpu,Efficientnet,Fused_Mul_fusion_10838140647117891545_kernel,1001.0,1.224,3.936,1.1840000000000002,1001.0,1.226,3.936,1.1840000000000002,10001.0,1.222,4.256,1.183,10001.0,1.222,4.256,1.183,10001.0,1.223,4.0,1.183,10001.0,1.053,1.408,1.023,10001.0,2.952000,3.680000,2.880000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_4668063445090929127_kernel,1001.0,1.244,3.936,1.215,1001.0,1.251,4.288,1.215,10001.0,1.242,4.288,1.215,10001.0,1.242,4.288,1.215,10001.0,1.242,4.064,1.215,10001.0,1.242,1.632,1.215,10001.0,3.192000,4.064000,3.104000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8007365833861578857_kernel,1001.0,1.2919999999999998,4.032,1.248,1001.0,1.332,8.415,1.248,10001.0,1.29,4.32,1.247,10001.0,1.29,4.32,1.247,10001.0,1.29,4.095,1.247,10001.0,1.291,1.663,1.247,10001.0,3.271000,3.840000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_2051207440919143714_kernel,1001.0,1.344,4.096,1.3119999999999998,1001.0,1.344,1.856,1.311,10001.0,1.342,4.448,1.311,10001.0,1.342,4.448,1.311,10001.0,1.342,4.192,1.311,10001.0,1.341,1.76,1.311,10001.0,3.332000,3.872000,3.263000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_17369107602907456092_kernel,1001.0,3.946,7.136,3.903,1001.0,3.97,6.976,3.903,10001.0,14.053,17.344,12.703,10001.0,14.053,17.344,12.703,10001.0,4.252,7.04,4.223,10001.0,4.252,4.608,4.223,10001.0,6.317000,6.848000,6.240000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8672105053581597621_kernel,1001.0,2.227,5.28,2.176,1001.0,2.227,5.312,2.176,10001.0,2.221,5.535,2.175,10001.0,2.221,5.535,2.175,10001.0,2.221,5.248,2.175,10001.0,2.227,2.912,2.175,10001.0,4.326000,5.632000,4.255000
output_gpu,Efficientnet,Fused_Mul_fusion_8800697375006570972_kernel,1001.0,1.244,3.936,1.215,1001.0,1.129,1.504,1.119,10001.0,1.242,4.256,1.215,10001.0,1.242,4.256,1.215,10001.0,1.242,4.0,1.215,10001.0,1.067,1.728,1.024,10001.0,2.968000,3.520000,2.880000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_12733149548290571670_kernel,1001.0,1.431,3.999,1.4069999999999998,1001.0,1.431,4.256,1.4069999999999998,10001.0,1.4280000000000002,4.319,1.4069999999999998,10001.0,1.4280000000000002,4.319,1.4069999999999998,10001.0,1.427,4.064,1.407,10001.0,1.429,9.344,1.407,10001.0,3.044000,3.456000,2.976000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_14799021857610233496_kernel,1001.0,1.46,4.032,1.439,1001.0,1.464,6.912000000000001,1.439,10001.0,1.4580000000000002,4.352,1.439,10001.0,1.4580000000000002,4.352,1.439,10001.0,1.457,4.096,1.439,10001.0,1.457,1.696,1.439,10001.0,3.282000,3.744000,3.231000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_7827774959145374305_kernel,1001.0,1.376,4.096,1.344,1001.0,1.372,1.7280000000000002,1.3430000000000002,10001.0,1.375,8.896,1.3430000000000002,10001.0,1.375,8.896,1.3430000000000002,10001.0,1.373,4.192,1.343,10001.0,1.374,1.856,1.343,10001.0,3.390000,4.096000,3.327000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_12630628420400923972_kernel,1001.0,550.734,561.721,547.257,1001.0,515.665,1454.03,512.025,10001.0,513.798,521.846,508.054,10001.0,513.798,521.846,508.054,10001.0,513.8009999999998,522.102,507.191,10001.0,513.942,1320.145,507.32200000000006,10001.0,516.616,522.2679999999999,514.587
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_17690394410245571852_kernel,1001.0,1.4240000000000002,4.032,1.4069999999999998,1001.0,1.431,3.136,1.4069999999999998,10001.0,1.421,4.32,1.4069999999999998,10001.0,1.421,4.32,1.4069999999999998,10001.0,1.421,4.128,1.407,10001.0,1.421,1.664,1.407,10001.0,3.277000,3.712000,3.200000
output_gpu,Efficientnet,Fused_ReduceSum_split_11554846572247357113_kernel,1001.0,2.307,5.504,2.271,1001.0,2.3080000000000003,5.535,2.271,10001.0,1.895,4.927,1.856,10001.0,1.895,4.927,1.856,10001.0,1.396,4.256,1.375,10001.0,1.422,2.208,1.375,10001.0,3.365000,3.871000,3.295000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_4989191170993967604_kernel,1001.0,1.499,4.224,1.471,1001.0,1.497,1.92,1.471,10001.0,1.496,4.704,1.471,10001.0,1.496,4.704,1.471,10001.0,1.495,4.32,1.471,10001.0,1.286,1.632,1.248,10001.0,3.261000,4.032000,3.199000
output_gpu,Efficientnet,Fused_Mul_split_10531007284226855954_kernel,1001.0,1.369,3.36,1.3430000000000002,1001.0,1.37,3.36,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.369,1.888,1.343,10001.0,3.236000,3.968000,3.167000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_6350445561669819294_kernel,1001.0,569.73,582.072,566.521,1001.0,512.016,520.377,510.8730000000001,10001.0,511.727,520.246,510.6460000000001,10001.0,511.727,520.246,510.6460000000001,10001.0,511.705,520.694,510.71,10001.0,511.869,1276.402,510.714,10001.0,515.756,520.636,514.428
output_gpu,Efficientnet,Fused_ReduceSum_split_13825580897818085376_kernel,1001.0,2.323,5.696000000000001,2.303,1001.0,2.321,2.9760000000000004,2.303,10001.0,1.987,4.96,1.951,10001.0,1.987,4.96,1.951,10001.0,1.326,4.096,1.311,10001.0,1.325,1.696,1.311,10001.0,3.288000,3.936000,3.200000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_17620288604821972172_kernel,1001.0,16.636,19.488,16.511,1001.0,16.629,17.12,16.511,10001.0,13.244000000000002,16.416,12.319,10001.0,13.244000000000002,16.416,12.319,10001.0,4.715,7.84,4.575,10001.0,4.724,7.776,4.576,10001.0,6.686000,7.263000,6.463000
output_gpu,Efficientnet,Fused_ReduceSum_split_5244294228731598086_kernel,1001.0,2.182,5.632000000000001,2.143,1001.0,2.181,5.504,2.144,10001.0,2.001,5.312,1.983,10001.0,2.001,5.312,1.983,10001.0,1.342,4.096,1.311,10001.0,1.342,1.696,1.311,10001.0,3.287000,3.840000,3.231000
output_gpu,Efficientnet,Fused_Mul_fusion_1983163138244450810_kernel,1001.0,1.234,3.904,1.215,1001.0,1.235,4.256,1.215,10001.0,1.233,4.64,1.215,10001.0,1.233,4.64,1.215,10001.0,1.232,4.0,1.215,10001.0,1.059,1.375,1.024,10001.0,2.960000,3.615000,2.911000
output_gpu,Efficientnet,Fused_Sub_split_7448475640211490738_kernel,1001.0,1.227,3.936,1.1840000000000002,1001.0,1.227,3.904,1.1840000000000002,10001.0,1.226,4.256,1.183,10001.0,1.226,4.256,1.183,10001.0,1.226,4.0,1.184,10001.0,1.225,1.6,1.183,10001.0,3.205000,3.744000,3.135000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_7550663849991553288_kernel,1001.0,51.747,56.383,50.848,1001.0,49.609,54.944,48.67100000000001,10001.0,48.424,55.007,46.847,10001.0,48.424,55.007,46.847,10001.0,48.285,54.591,46.911,10001.0,47.698,50.943000000000005,46.848,10001.0,51.591,55.999,50.143
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_12378051260437362614_kernel,1001.0,48.635,53.887,47.967,1001.0,48.628,54.88,47.935,10001.0,48.42100000000001,54.879,46.527,10001.0,48.42100000000001,54.879,46.527,10001.0,48.529,54.303,46.527,10001.0,48.599,747.127,46.527,10001.0,50.895,54.112,49.695
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8642063818402945494_kernel,1001.0,1.245,4.0,1.215,1001.0,1.247,4.416,1.215,10001.0,1.242,4.256,1.215,10001.0,1.242,4.256,1.215,10001.0,1.243,4.0,1.215,10001.0,1.241,1.6,1.215,10001.0,3.187000,3.744000,3.103000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_4676503837012581150_kernel,1001.0,1.401,4.256,1.375,1001.0,1.399,1.76,1.375,10001.0,1.399,4.48,1.375,10001.0,1.399,4.48,1.375,10001.0,1.399,4.192,1.375,10001.0,1.201,1.568,1.183,10001.0,3.170000,3.712000,3.104000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_8559273965687883948_kernel,1001.0,71.307,77.023,70.719,1001.0,66.51,73.15100000000001,65.919,10001.0,66.15100000000001,72.191,65.247,10001.0,66.15100000000001,72.191,65.247,10001.0,66.152,72.28699999999999,65.183,10001.0,65.90899999999999,68.927,65.18400000000001,10001.0,68.20400000000001,71.77600000000001,67.29499999999999
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_1140059710759509553_kernel,1001.0,20.097,22.431,19.52,1001.0,19.518,25.92,18.911,10001.0,19.822,25.695,18.911,10001.0,19.822,25.695,18.911,10001.0,19.832,25.471,18.976,10001.0,19.783,22.944,18.976,10001.0,22.44,25.951999999999998,21.215999999999998
output_gpu,Efficientnet,Fused_Mul_fusion_6218008785421748095_kernel,1001.0,1.43,4.0,1.4069999999999998,1001.0,1.4380000000000002,2.4,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.032,1.407,10001.0,1.429,3.232,1.407,10001.0,3.282000,3.744000,3.200000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_17166085221301558149_kernel,1001.0,46.319,51.551,45.43899999999999,1001.0,43.303,49.567,42.527,10001.0,43.121,49.055,42.463,10001.0,43.121,49.055,42.463,10001.0,43.109,48.831,42.431,10001.0,43.07,46.367,42.399,10001.0,45.135,48.768,44.448
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_3498095854469570274_kernel,1001.0,80.828,86.559,80.223,1001.0,81.625,874.166,80.223,10001.0,80.452,86.68599999999998,79.486,10001.0,80.452,86.68599999999998,79.486,10001.0,80.292,85.91799999999998,79.48700000000002,10001.0,80.226,774.168,79.48700000000001,10001.0,82.87,86.23899999999999,82.079
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_2843991197081095484_kernel,1001.0,1.225,2.9760000000000004,1.1840000000000002,1001.0,1.224,1.568,1.1840000000000002,10001.0,1.224,7.392,1.183,10001.0,1.224,7.392,1.183,10001.0,1.224,3.072,1.183,10001.0,1.224,1.536,1.183,10001.0,3.088000,3.807000,3.007000
output_gpu,Efficientnet,Fused_Mul_fusion_7136422499865396003_kernel,1001.0,33.548,38.72,32.832,1001.0,31.43,38.687,30.752,10001.0,31.227,38.016,30.432,10001.0,31.227,38.016,30.432,10001.0,31.365,37.536,30.463,10001.0,31.276,36.608,30.463,10001.0,33.785999999999994,37.791,32.512
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_4417734995010026604_kernel,1001.0,1.474,4.32,1.44,1001.0,1.4780000000000002,4.928,1.44,10001.0,1.472,4.512,1.439,10001.0,1.472,4.512,1.439,10001.0,1.471,4.384,1.439,10001.0,1.472,1.824,1.439,10001.0,3.492000,4.064000,3.423000
output_gpu,Efficientnet,Fused_Mul_fusion_2452436444946535679_kernel,1001.0,24.282,28.991,23.775,1001.0,22.327,28.895,21.823,10001.0,22.269,28.671,21.695,10001.0,22.269,28.671,21.695,10001.0,22.28700000000001,28.416,21.536,10001.0,22.015,26.431,21.44,10001.0,24.447,28.543,23.744
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_14987569427484494766_kernel,1001.0,1.284,4.032,1.247,1001.0,1.2819999999999998,1.664,1.247,10001.0,1.2819999999999998,4.672,1.247,10001.0,1.2819999999999998,4.672,1.247,10001.0,1.282,4.128,1.247,10001.0,1.167,9.792,1.151,10001.0,3.266000,3.808000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_4806623134462524188_kernel,1001.0,1.3119999999999998,4.032,1.279,1001.0,1.32,4.288,1.279,10001.0,1.3119999999999998,4.352,1.279,10001.0,1.3119999999999998,4.352,1.279,10001.0,1.31,4.128,1.279,10001.0,1.311,1.664,1.279,10001.0,3.265000,3.840000,3.199000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_4847394849092105168_kernel,1001.0,1.429,3.968,1.4069999999999998,1001.0,1.429,1.888,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.031,1.407,10001.0,1.428,1.664,1.407,10001.0,3.047000,3.456000,2.976000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_8195749184902452694_kernel,1001.0,1.054,3.647,1.023,1001.0,1.064,9.6,1.023,10001.0,1.055,4.16,1.023,10001.0,1.055,4.16,1.023,10001.0,1.055,3.712,1.023,10001.0,1.054,1.408,1.023,10001.0,2.995000,3.456000,2.912000
output_gpu,Efficientnet,Fused_Mul_fusion_11586879053041252169_kernel,1001.0,1.389,3.264,1.344,1001.0,1.392,3.264,1.375,10001.0,1.388,3.36,1.344,10001.0,1.388,3.36,1.344,10001.0,1.388,3.328,1.344,10001.0,1.194,1.568,1.152,10001.0,3.030000,3.648000,2.975000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_16630542231493221412_kernel,1001.0,33.122,38.816,32.544000000000004,1001.0,31.899,702.5830000000001,30.623,10001.0,32.008,37.887,31.199,10001.0,32.008,37.887,31.199,10001.0,32.059,37.791,31.296,10001.0,31.904000000000003,35.36,31.136,10001.0,34.006,37.504000000000005,33.279
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_4812934510509418830_kernel,1001.0,1.225,2.9760000000000004,1.1840000000000002,1001.0,1.223,1.536,1.1840000000000002,10001.0,1.224,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.227,10.528,1.183,10001.0,3.084000,3.712000,3.007000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_5715594726748242336_kernel,1001.0,1.225,3.008,1.1840000000000002,1001.0,1.223,1.504,1.1840000000000002,10001.0,1.225,9.535,1.183,10001.0,1.225,9.535,1.183,10001.0,1.224,3.04,1.183,10001.0,1.223,1.536,1.183,10001.0,3.081000,3.712000,3.007000
output_gpu,Efficientnet,Fused_Mul_fusion_5428777890876628475_kernel,1001.0,1.4340000000000002,4.0,1.4069999999999998,1001.0,1.4340000000000002,3.968,1.4069999999999998,10001.0,1.433,4.32,1.4069999999999998,10001.0,1.433,4.32,1.4069999999999998,10001.0,1.432,4.096,1.407,10001.0,1.432,1.664,1.407,10001.0,3.057000,3.552000,3.007000
output_gpu,Efficientnet,Fused_Add_fusion_9073542556908647050_kernel,1001.0,19.578,25.984,19.072,1001.0,19.565,25.951,19.072,10001.0,19.56,25.983,18.88000000000001,10001.0,19.56,25.983,18.88000000000001,10001.0,19.58,25.279,18.879,10001.0,19.089,22.271,18.496,10001.0,21.822000000000003,26.016000000000002,20.927999999999997
output_gpu,Efficientnet,Fused_ReduceSum_split_13799066988795076930_kernel,1001.0,5.022,8.064,4.9910000000000005,1001.0,5.017,5.664,4.96,10001.0,2.509,5.599,2.464,10001.0,2.509,5.599,2.464,10001.0,1.82,4.704,1.791,10001.0,1.822,2.304,1.791,10001.0,3.863000,4.512000,3.776000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_17247126549616238103_kernel,1001.0,1.246,3.968,1.215,1001.0,1.245,3.936,1.215,10001.0,1.243,4.544,1.215,10001.0,1.243,4.544,1.215,10001.0,1.243,3.968,1.215,10001.0,1.242,1.6,1.215,10001.0,3.124000,3.648000,3.071000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_6079993840901460119_kernel,1001.0,447.149,460.474,440.923,1001.0,414.983,431.418,410.298,10001.0,413.02,432.599,409.816,10001.0,413.02,432.599,409.816,10001.0,412.9980000000001,431.769,409.945,10001.0,412.586,1064.1799999999998,409.723,10001.0,427.388,430.81199999999995,419.772
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_5154262782599102589_kernel,1001.0,1.331,4.192,1.311,1001.0,1.331,4.16,1.311,10001.0,1.328,4.383,1.311,10001.0,1.328,4.383,1.311,10001.0,1.328,4.224,1.311,10001.0,1.328,1.728,1.311,10001.0,3.341000,3.904000,3.295000
output_gpu,Efficientnet,Fused_Mul_fusion_801685386702961512_kernel,1001.0,111.172,117.791,110.142,1001.0,100.68,104.446,100.095,10001.0,100.405,106.526,99.806,10001.0,100.405,106.526,99.806,10001.0,100.395,106.43,99.806,10001.0,94.971,784.535,94.015,10001.0,102.92,106.39899999999999,101.983
output_gpu,Efficientnet,Fused_ReduceSum_split_9675091310831314754_kernel,1001.0,262.712,273.436,261.821,1001.0,262.704,273.597,261.757,10001.0,261.334,273.819,260.763,10001.0,261.334,273.819,260.763,10001.0,261.325,273.17900000000003,260.795,10001.0,261.476,967.381,260.797,10001.0,263.96500000000003,273.40500000000003,263.005000
output_gpu,Efficientnet,Fused_Mul_fusion_11620172359481571392_kernel,1001.0,1.4069999999999998,4.0,1.375,1001.0,1.4069999999999998,3.968,1.375,10001.0,1.405,4.32,1.375,10001.0,1.405,4.32,1.375,10001.0,1.404,4.032,1.375,10001.0,1.404,1.632,1.375,10001.0,3.273000,3.809000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_9144066609792485434_kernel,1001.0,1.243,3.968,1.215,1001.0,1.242,4.0,1.215,10001.0,1.242,4.32,1.215,10001.0,1.242,4.32,1.215,10001.0,1.242,4.064,1.215,10001.0,1.241,4.288,1.215,10001.0,3.189000,3.744000,3.135000
output_gpu,Efficientnet,Fused_Mul_fusion_9635090408711755453_kernel,1001.0,1.245,12.928,1.215,1001.0,1.232,3.936,1.215,10001.0,1.231,4.288,1.215,10001.0,1.231,4.288,1.215,10001.0,1.232,3.999,1.215,10001.0,1.059,1.376,1.023,10001.0,2.954000,3.489000,2.911000
output_gpu,Efficientnet,Fused_Mul_ReduceSum_split_2279840394601915542_kernel,1001.0,520.474,531.545,519.641,1001.0,521.869,1225.233,519.5459999999999,10001.0,518.8620000000002,531.509,518.198,10001.0,518.8620000000002,531.509,518.198,10001.0,518.861,531.83,518.167,10001.0,519.072,1307.249,518.138,10001.0,521.204,530.715,520.507000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1833783913605493022_kernel,1001.0,1.278,3.968,1.248,1001.0,1.279,1.7919999999999998,1.247,10001.0,1.276,4.512,1.247,10001.0,1.276,4.512,1.247,10001.0,1.278,4.064,1.247,10001.0,1.276,1.632,1.247,10001.0,3.158000,3.743000,3.103000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_11891238887330808906_kernel,1001.0,129.866,135.74200000000002,128.92600000000002,1001.0,134.313,140.35,132.894,10001.0,134.632,140.637,132.605,10001.0,134.632,140.637,132.605,10001.0,134.684,140.35,132.733,10001.0,134.712,833.878,132.67100000000002,10001.0,136.33200000000002,140.382,134.367
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_15353168881583858365_kernel,1001.0,654.337,675.096,650.231,1001.0,627.276,1525.612,614.9680000000001,10001.0,618.195,647.54,609.0759999999998,10001.0,618.195,647.54,609.0759999999998,10001.0,618.201,646.004,608.981,10001.0,618.301,1360.432,608.793,10001.0,635.753,639.3219999999999,632.4739999999999
output_gpu,Efficientnet,Fused_Mul_fusion_14617519833702014655_kernel,1001.0,1.43,3.967,1.4069999999999998,1001.0,1.431,4.064,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.064,1.407,10001.0,1.428,1.728,1.407,10001.0,3.289000,3.744000,3.231000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_15671864405722822694_kernel,1001.0,1.43,3.968,1.4069999999999998,1001.0,1.4269999999999998,1.696,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.064,1.407,10001.0,1.429,8.96,1.407,10001.0,3.050000,3.456000,3.007000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_17323616141832060289_kernel,1001.0,1.3969999999999998,9.759,1.344,1001.0,1.379,1.7280000000000002,1.3430000000000002,10001.0,1.38,4.512,1.3430000000000002,10001.0,1.38,4.512,1.3430000000000002,10001.0,1.379,4.16,1.343,10001.0,1.185,1.632,1.151,10001.0,3.121000,3.680000,3.071000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_14469970598104271457_kernel,1001.0,1.242,4.255,1.215,1001.0,1.352,2.752,1.215,10001.0,1.239,4.288,1.215,10001.0,1.239,4.288,1.215,10001.0,1.239,4.0,1.215,10001.0,1.127,1.472,1.119,10001.0,2.967000,3.616000,2.911000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_2457761176284161601_kernel,1001.0,1.431,3.968,1.4069999999999998,1001.0,1.4269999999999998,1.7280000000000002,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.032,1.407,10001.0,1.427,1.664,1.407,10001.0,3.051000,3.488000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1392880622459342038_kernel,1001.0,1.245,3.936,1.215,1001.0,1.248,5.312,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,4.032,1.215,10001.0,1.243,1.6,1.215,10001.0,3.180000,3.744000,3.135000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_15841842021171634546_kernel,1001.0,1.056,3.872,1.024,1001.0,1.061,3.968,1.024,10001.0,1.054,3.968,1.023,10001.0,1.054,3.968,1.023,10001.0,1.056,3.712,1.023,10001.0,1.053,1.568,1.023,10001.0,3.013000,3.585000,2.943000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_14848397733028510781_kernel,1001.0,276.033,283.516,273.244,1001.0,248.482,255.197,247.069,10001.0,248.142,255.291,246.843,10001.0,248.142,255.291,246.843,10001.0,248.07100000000003,256.315,246.844,10001.0,248.078,929.526,246.877,10001.0,250.80700000000002,255.006,249.694
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1450306126791352014_kernel,1001.0,1.526,4.256,1.503,1001.0,1.567,3.584,1.503,10001.0,1.524,8.736,1.503,10001.0,1.524,8.736,1.503,10001.0,1.523,4.352,1.503,10001.0,1.393,14.112,1.344,10001.0,3.545000,4.128000,3.487000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8205987125984955116_kernel,1001.0,1.294,4.032,1.248,1001.0,1.296,1.824,1.248,10001.0,1.2919999999999998,4.352,1.247,10001.0,1.2919999999999998,4.352,1.247,10001.0,1.292,4.256,1.247,10001.0,1.292,1.664,1.247,10001.0,3.256000,3.808000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1645072446032296459_kernel,1001.0,1.245,3.968,1.215,1001.0,1.243,1.6,1.215,10001.0,1.243,4.255,1.215,10001.0,1.243,4.255,1.215,10001.0,1.243,4.0,1.215,10001.0,1.131,1.472,1.119,10001.0,3.120000,3.649000,3.071000
output_gpu,Efficientnet,Fused_ReduceSum_split_13190158174088614872_kernel,1001.0,2.198,5.6,2.175,1001.0,2.194,3.072,2.175,10001.0,2.03,5.312,2.015,10001.0,2.03,5.312,2.015,10001.0,1.346,4.448,1.311,10001.0,1.345,1.696,1.311,10001.0,3.288000,3.936000,3.231000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_4171484740663342528_kernel,1001.0,1.279,4.064,1.247,1001.0,1.293,2.112,1.247,10001.0,1.276,4.352,1.247,10001.0,1.276,4.352,1.247,10001.0,1.276,4.224,1.247,10001.0,1.278,9.28,1.247,10001.0,3.247000,3.872000,3.199000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_12072049412550954200_kernel,1001.0,217.431,224.445,214.525,1001.0,188.775,195.87,184.35,10001.0,183.927,195.612,181.692,10001.0,183.927,195.612,181.692,10001.0,184.3,194.877,181.629,10001.0,183.546,948.342,181.79,10001.0,190.489,193.246,188.57399999999998
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_5272041617069288790_kernel,1001.0,1.055,3.648,1.023,1001.0,1.057,1.344,1.023,10001.0,1.0590000000000002,9.024,1.023,10001.0,1.0590000000000002,9.024,1.023,10001.0,1.057,3.712,1.023,10001.0,1.057,1.44,1.023,10001.0,2.995000,3.424000,2.943000
output_gpu,Efficientnet,Fused_Mul_ReduceSum_split_6803180149090166107_kernel,1001.0,348.07300000000004,358.172,347.196,1001.0,349.60900000000004,1326.767,347.06699999999995,10001.0,347.091,359.161,346.297,10001.0,347.091,359.161,346.297,10001.0,347.105,360.249,346.361,10001.0,347.24,1089.556,346.396,10001.0,349.452,359.09999999999997,348.221000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8442538944795166530_kernel,1001.0,1.275,3.936,1.247,1001.0,1.273,1.632,1.247,10001.0,1.273,4.448,1.247,10001.0,1.273,4.448,1.247,10001.0,1.273,4.16,1.247,10001.0,1.273,1.632,1.247,10001.0,3.233000,3.776000,3.167000
output_gpu,Efficientnet,Fused_Sub_split_6663685354159793162_kernel,1001.0,1.279,3.968,1.247,1001.0,1.28,4.0,1.247,10001.0,1.278,4.288,1.247,10001.0,1.278,4.288,1.247,10001.0,1.278,4.192,1.247,10001.0,1.277,6.4,1.247,10001.0,3.245000,3.807000,3.168000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_7597173094415703201_kernel,1001.0,155.491,161.15,154.046,1001.0,131.595,137.886,130.686,10001.0,130.71200000000002,137.11700000000002,129.59699999999998,10001.0,130.71200000000002,137.11700000000002,129.59699999999998,10001.0,130.685,137.15,129.59799999999998,10001.0,130.606,134.655,129.59799999999998,10001.0,133.429,137.214,132.351
output_gpu,Efficientnet,Fused_Add_fusion_15749877382061462374_kernel,1001.0,31.595,36.927,30.911,1001.0,31.574,35.103,30.912,10001.0,31.502,37.696,30.751,10001.0,31.502,37.696,30.751,10001.0,31.556,36.927,30.783,10001.0,30.369,34.367000000000004,29.759,10001.0,33.688,36.704,32.832
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_5910599757864652186_kernel,1001.0,1.409,3.36,1.375,1001.0,1.408,1.791,1.376,10001.0,1.408,3.36,1.375,10001.0,1.408,3.36,1.375,10001.0,1.408,3.36,1.375,10001.0,1.424,4.32,1.375,10001.0,3.248000,3.840000,3.199000
output_gpu,Efficientnet,Fused_Mul_ReduceSum_split_6886769834019553517_kernel,1001.0,463.81,477.658,462.682,1001.0,465.817,1212.432,462.682,10001.0,461.168,473.847,460.407,10001.0,461.168,473.847,460.407,10001.0,461.177,473.048,460.472,10001.0,461.436,1182.994,460.506,10001.0,463.501,473.27599999999995,462.268000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_13910028728076263536_kernel,1001.0,652.02,666.712,647.352,1001.0,583.6419999999999,1291.088,581.0169999999999,10001.0,582.418,589.7170000000001,580.8530000000001,10001.0,582.418,589.7170000000001,580.8530000000001,10001.0,582.433,590.39,580.981,10001.0,582.688,1291.185,580.922,10001.0,584.819,590.043,583.483
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_6225299096689376425_kernel,1001.0,1.405,3.264,1.376,1001.0,1.409,2.016,1.375,10001.0,1.404,3.392,1.375,10001.0,1.404,3.392,1.375,10001.0,1.404,3.328,1.375,10001.0,1.404,1.792,1.375,10001.0,3.258000,4.064000,3.199000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_6454105740187182362_kernel,1001.0,1.43,3.968,1.4069999999999998,1001.0,1.429,2.144,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.427,4.064,1.407,10001.0,1.428,1.664,1.407,10001.0,3.272000,3.712000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_18230314276017874989_kernel,1001.0,2.476,5.6,2.4,1001.0,2.485,5.568,2.399,10001.0,2.471,5.92,2.367,10001.0,2.471,5.92,2.367,10001.0,2.471,5.568,2.367,10001.0,2.309,2.976,2.271,10001.0,4.550000,5.280000,4.415000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_3395790944426933839_kernel,1001.0,1.928,4.704,1.888,1001.0,1.922,2.336,1.887,10001.0,1.919,5.056,1.887,10001.0,1.919,5.056,1.887,10001.0,1.919,4.768,1.887,10001.0,1.65,2.048,1.6,10001.0,3.670000,4.256000,3.615000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_6660525506331650626_kernel,1001.0,33.125,38.336,32.48,1001.0,31.225,34.399,30.591,10001.0,32.035000000000004,38.431,31.263,10001.0,32.035000000000004,38.431,31.263,10001.0,32.062,37.792,31.359,10001.0,32.025,38.4,31.263,10001.0,34.178,38.623,33.055
output_gpu,Efficientnet,Fused_Mul_fusion_1814995894034196293_kernel,1001.0,48.108,53.375,47.487,1001.0,44.083,50.239,43.392,10001.0,44.04600000000001,50.271,43.231,10001.0,44.04600000000001,50.271,43.231,10001.0,44.037,50.815,43.327,10001.0,41.766,45.791,40.927,10001.0,46.379,49.855,45.088
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_6442127154021233630_kernel,1001.0,1.849,4.672,1.823,1001.0,1.847,2.432,1.823,10001.0,1.843,5.024,1.791,10001.0,1.843,5.024,1.791,10001.0,1.845,4.8,1.823,10001.0,1.845,2.304,1.823,10001.0,3.905000,4.576000,3.839000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_16536564857464521794_kernel,1001.0,1.285,4.032,1.248,1001.0,1.2819999999999998,1.696,1.248,10001.0,1.2819999999999998,4.383,1.247,10001.0,1.2819999999999998,4.383,1.247,10001.0,1.282,4.096,1.247,10001.0,1.282,1.696,1.247,10001.0,3.197000,3.777000,3.135000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_6710532957262761011_kernel,1001.0,1.4380000000000002,4.0,1.4069999999999998,1001.0,1.4369999999999998,3.36,1.4069999999999998,10001.0,1.436,4.32,1.4069999999999998,10001.0,1.436,4.32,1.4069999999999998,10001.0,1.436,4.064,1.407,10001.0,1.436,4.0,1.407,10001.0,3.276000,3.743000,3.200000
output_gpu,Efficientnet,Fused_Mul_split_5258977607093574711_kernel,1001.0,1.2990000000000002,4.032,1.279,1001.0,1.4,4.352,1.279,10001.0,1.297,4.384,1.279,10001.0,1.297,4.384,1.279,10001.0,1.297,9.088,1.279,10001.0,1.296,1.664,1.279,10001.0,3.312000,3.872000,3.232000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_7506054908339298865_kernel,1001.0,65.939,71.616,65.215,1001.0,68.082,74.88,67.23100000000001,10001.0,68.095,74.559,67.135,10001.0,68.095,74.559,67.135,10001.0,68.12899999999999,74.015,67.135,10001.0,68.096,74.911,67.104,10001.0,70.235,74.46300000000001,69.088
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_11074468992660818505_kernel,1001.0,1.25,3.936,1.215,1001.0,1.249,1.696,1.215,10001.0,1.249,4.288,1.215,10001.0,1.249,4.288,1.215,10001.0,1.249,4.032,1.215,10001.0,1.266,4.96,1.215,10001.0,3.178000,3.712000,3.103000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.183,1001.0,1.224,2.9760000000000004,1.1840000000000002,10001.0,1.223,3.04,1.183,10001.0,1.223,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.208,1.536,1.183,10001.0,3.062000,3.552000,3.007000
output_gpu,Efficientnet,Fused_Mul_fusion_9888175981854340919_kernel,1001.0,1.233,4.512,1.215,1001.0,1.242,6.496,1.215,10001.0,1.23,4.224,1.215,10001.0,1.23,4.224,1.215,10001.0,1.231,3.968,1.215,10001.0,1.229,1.823,1.215,10001.0,2.955000,3.776000,2.880000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_14315780451896088896_kernel,1001.0,1.224,2.944,1.1840000000000002,1001.0,1.227,3.008,1.1840000000000002,10001.0,1.224,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.224,3.104,1.183,10001.0,1.08,1.472,1.055,10001.0,3.074000,3.744000,3.007000
output_gpu,Efficientnet,Fused_ReduceSum_split_2064422502572383303_kernel,1001.0,2.3,5.28,2.271,1001.0,2.3,5.312,2.271,10001.0,1.957,4.896,1.92,10001.0,1.957,4.896,1.92,10001.0,1.324,4.096,1.311,10001.0,1.324,1.664,1.311,10001.0,3.251000,3.840000,3.199000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_5430153301064305438_kernel,1001.0,1.225,2.9760000000000004,1.183,1001.0,1.224,1.536,1.1840000000000002,10001.0,1.225,3.04,1.183,10001.0,1.225,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.223,1.536,1.183,10001.0,3.089000,3.743000,3.007000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_4261862423109646487_kernel,1001.0,1.061,3.808,1.024,1001.0,1.062,1.472,1.024,10001.0,1.062,4.032,1.023,10001.0,1.062,4.032,1.023,10001.0,1.063,3.84,1.023,10001.0,1.062,1.344,1.024,10001.0,3.011000,3.744000,2.943000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_13094596438425763604_kernel,1001.0,1.056,3.616,1.023,1001.0,1.062,3.776,1.023,10001.0,1.058,4.192,1.023,10001.0,1.058,4.192,1.023,10001.0,1.06,3.872,1.023,10001.0,0.938,1.344,0.895,10001.0,2.995000,3.424000,2.912000
output_gpu,Efficientnet,Fused_Mul_fusion_16289435274964764525_kernel,1001.0,14.875,18.655,14.399,1001.0,13.938,20.48,13.408,10001.0,13.933,20.64,13.376,10001.0,13.933,20.64,13.376,10001.0,13.879,20.32,13.215,10001.0,13.876,18.24,13.216,10001.0,16.485000,20.031000,15.712000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_11848777849059956305_kernel,1001.0,1.24,3.904,1.215,1001.0,1.241,4.288,1.215,10001.0,1.238,4.064,1.215,10001.0,1.238,4.064,1.215,10001.0,1.238,3.808,1.215,10001.0,1.092,1.536,1.055,10001.0,3.103000,4.800000,3.039000
output_gpu,Efficientnet,Fused_Add_fusion_4884374332861798850_kernel,1001.0,43.279,48.191,42.559,1001.0,43.273,46.528,42.655,10001.0,43.2,49.183,42.399,10001.0,43.2,49.183,42.399,10001.0,43.203,48.767,42.335,10001.0,41.809,740.056,41.151,10001.0,45.565000000000005,48.704,44.574999999999996
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_18040555757422770515_kernel,1001.0,1.409,3.392,1.375,1001.0,1.408,1.952,1.375,10001.0,1.408,3.488,1.375,10001.0,1.408,3.488,1.375,10001.0,1.407,3.456,1.375,10001.0,1.407,1.92,1.375,10001.0,3.264000,4.096000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_6273038938166943728_kernel,1001.0,3.644,6.944,3.552,1001.0,3.652,4.448,3.552,10001.0,3.636,7.2,3.52,10001.0,3.636,7.2,3.52,10001.0,3.639,6.975,3.551,10001.0,2.851,3.744,2.751,10001.0,5.884000,6.784000,5.759000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1132711365602941589_kernel,1001.0,1.6769999999999998,4.448,1.632,1001.0,1.6769999999999998,4.511,1.632,10001.0,1.673,4.768,1.631,10001.0,1.673,4.768,1.631,10001.0,1.674,4.576,1.631,10001.0,1.675,4.992,1.632,10001.0,3.680000,4.288000,3.615000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_6727022908922420278_kernel,1001.0,4.93,8.16,4.895,1001.0,4.931,9.024,4.895,10001.0,14.291,17.824,13.983,10001.0,14.291,17.824,13.983,10001.0,4.335,7.232,4.287,10001.0,4.349,16.32,4.287,10001.0,5.943000,6.464000,5.887000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_5536299951513084258_kernel,1001.0,1.266,4.0,1.247,1001.0,1.267,4.735,1.247,10001.0,1.262,4.512,1.247,10001.0,1.262,4.512,1.247,10001.0,1.262,4.032,1.247,10001.0,1.263,3.36,1.247,10001.0,3.219000,3.808000,3.167000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_3646968818818513730_kernel,1001.0,1.262,3.712,1.247,1001.0,1.259,1.44,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.26,3.808,1.247,10001.0,1.289,2.528,1.247,10001.0,3.095000,3.552000,3.039000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_6112055824891317325_kernel,1001.0,192.488,198.237,190.141,1001.0,160.237,165.59799999999998,155.326,10001.0,155.40599999999995,165.981,153.085,10001.0,155.40599999999995,165.981,153.085,10001.0,155.32199999999995,166.30100000000004,153.053,10001.0,154.585,862.8059999999999,153.18200000000002,10001.0,160.81799999999998,164.446,159.071
output_gpu,Efficientnet,Fused_ReduceSum_split_16545097466616768443_kernel,1001.0,2.1630000000000003,5.312,2.143,1001.0,2.164,5.312,2.143,10001.0,2.0,4.96,1.983,10001.0,2.0,4.96,1.983,10001.0,1.309,4.064,1.279,10001.0,1.323,1.92,1.279,10001.0,3.277000,3.776000,3.200000
output_gpu,Efficientnet,Fused_Add_fusion_4937376095707588825_kernel,1001.0,142.11,147.07,141.375,1001.0,142.105,145.63,141.279,10001.0,142.17,147.517,141.245,10001.0,142.17,147.517,141.245,10001.0,142.16400000000004,146.94099999999997,141.245,10001.0,137.86100000000002,884.3100000000001,137.086,10001.0,144.011,147.74300000000002,143.103
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_13658837027066979073_kernel,1001.0,2.478,5.5360000000000005,2.399,1001.0,2.479,3.3280000000000003,2.3680000000000003,10001.0,2.469,5.92,2.3680000000000003,10001.0,2.469,5.92,2.3680000000000003,10001.0,2.47,5.568,2.368,10001.0,2.31,3.008,2.271,10001.0,4.647000,5.376000,4.511000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_14020345364147179976_kernel,1001.0,1.295,4.16,1.248,1001.0,1.297,4.159,1.248,10001.0,1.2919999999999998,4.384,1.247,10001.0,1.2919999999999998,4.384,1.247,10001.0,1.292,4.256,1.247,10001.0,1.293,2.112,1.247,10001.0,3.245000,3.808000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1321779060138061667_kernel,1001.0,1.268,3.968,1.247,1001.0,1.267,1.664,1.247,10001.0,1.266,4.32,1.247,10001.0,1.266,4.32,1.247,10001.0,1.265,4.032,1.247,10001.0,1.265,1.632,1.247,10001.0,3.235000,3.776000,3.167000
output_gpu,Efficientnet,Fused_Mul_split_4963195551358689714_kernel,1001.0,1.281,4.128,1.247,1001.0,1.281,4.192,1.247,10001.0,1.278,4.352,1.247,10001.0,1.278,4.352,1.247,10001.0,1.277,4.032,1.247,10001.0,1.278,1.632,1.247,10001.0,3.244000,3.776000,3.168000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_8041930029223397743_kernel,1001.0,1.375,4.096,1.3430000000000002,1001.0,1.374,1.888,1.3430000000000002,10001.0,1.374,4.544,1.3430000000000002,10001.0,1.374,4.544,1.3430000000000002,10001.0,1.373,4.32,1.343,10001.0,1.375,9.44,1.343,10001.0,3.397000,3.935000,3.327000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_7953857284013686674_kernel,1001.0,1.679,4.448,1.632,1001.0,1.675,2.08,1.632,10001.0,1.673,4.8,1.631,10001.0,1.673,4.8,1.631,10001.0,1.674,5.12,1.631,10001.0,1.675,2.08,1.632,10001.0,3.684000,4.288000,3.616000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_18235002013270691364_kernel,1001.0,1.224,3.008,1.183,1001.0,1.224,1.6,1.1840000000000002,10001.0,1.223,3.104,1.183,10001.0,1.223,3.104,1.183,10001.0,1.224,3.04,1.183,10001.0,1.236,9.28,1.183,10001.0,3.070000,3.776000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_3572185621250037198_kernel,1001.0,1.4069999999999998,3.392,1.376,1001.0,1.406,1.984,1.375,10001.0,1.406,3.4560000000000004,1.375,10001.0,1.406,3.4560000000000004,1.375,10001.0,1.406,3.488,1.375,10001.0,1.406,8.96,1.375,10001.0,3.255000,4.000000,3.199000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_16917353644544047116_kernel,1001.0,1.43,4.0,1.4069999999999998,1001.0,1.4280000000000002,1.664,1.4069999999999998,10001.0,1.4269999999999998,4.32,1.4069999999999998,10001.0,1.4269999999999998,4.32,1.4069999999999998,10001.0,1.428,4.064,1.407,10001.0,1.429,9.664,1.407,10001.0,3.052000,4.320000,3.007000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_11179954567090841650_kernel,1001.0,1.43,3.968,1.4069999999999998,1001.0,1.4280000000000002,1.664,1.4069999999999998,10001.0,1.4280000000000002,4.352,1.4069999999999998,10001.0,1.4280000000000002,4.352,1.4069999999999998,10001.0,1.428,8.416,1.407,10001.0,1.428,3.936,1.407,10001.0,3.058000,3.584000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_9473904397230948095_kernel,1001.0,1.296,4.16,1.248,1001.0,1.2990000000000002,2.048,1.248,10001.0,1.2919999999999998,4.416,1.247,10001.0,1.2919999999999998,4.416,1.247,10001.0,1.292,4.096,1.247,10001.0,1.292,1.792,1.247,10001.0,3.253000,3.936000,3.168000
output_gpu,Efficientnet,Fused_Add_fusion_11859580883021945139_kernel,1001.0,60.377,65.759,59.456,1001.0,60.381,62.88,59.583,10001.0,60.223,65.69500000000001,59.327,10001.0,60.223,65.69500000000001,59.327,10001.0,60.235,66.65400000000001,59.359,10001.0,58.655,62.303,58.047,10001.0,62.398,65.44,61.503
output_gpu,Efficientnet,Fused_Mul_split_116248657496876303_kernel,1001.0,1.227,3.936,1.1840000000000002,1001.0,1.233,4.0,1.1840000000000002,10001.0,1.226,4.224,1.1840000000000002,10001.0,1.226,4.224,1.1840000000000002,10001.0,1.225,4.064,1.183,10001.0,1.225,1.728,1.184,10001.0,3.198000,3.712000,3.135000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_17262428168388574704_kernel,1001.0,1.406,3.392,1.375,1001.0,1.415,3.392,1.375,10001.0,1.404,3.488,1.375,10001.0,1.404,3.488,1.375,10001.0,1.404,3.488,1.375,10001.0,1.405,7.328,1.375,10001.0,3.259000,3.968000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_11308781628985016361_kernel,1001.0,1.237,3.968,1.215,1001.0,1.235,1.6,1.215,10001.0,1.235,4.288,1.215,10001.0,1.235,4.288,1.215,10001.0,1.236,4.0,1.215,10001.0,1.235,1.696,1.215,10001.0,3.174000,3.745000,3.103000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_3114748668713672597_kernel,1001.0,1.429,3.968,1.4069999999999998,1001.0,1.43,4.256,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.4280000000000002,4.32,1.4069999999999998,10001.0,1.428,4.064,1.407,10001.0,1.429,9.216,1.407,10001.0,3.050000,3.456000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_7735534824565904205_kernel,1001.0,1.285,4.032,1.247,1001.0,1.285,4.064,1.247,10001.0,1.2819999999999998,4.48,1.247,10001.0,1.2819999999999998,4.48,1.247,10001.0,1.282,4.16,1.247,10001.0,1.284,8.032,1.247,10001.0,3.264000,3.840000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_4809881116960841411_kernel,1001.0,1.406,3.392,1.375,1001.0,1.4069999999999998,1.952,1.375,10001.0,1.404,3.4560000000000004,1.375,10001.0,1.404,3.4560000000000004,1.375,10001.0,1.404,3.456,1.375,10001.0,1.404,1.92,1.375,10001.0,3.254000,4.065000,3.199000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_5782460969217780077_kernel,1001.0,1.246,3.936,1.215,1001.0,1.243,1.6,1.216,10001.0,1.243,4.256,1.215,10001.0,1.243,4.256,1.215,10001.0,1.242,4.0,1.215,10001.0,1.242,1.6,1.215,10001.0,3.188000,3.840000,3.135000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_16315866732800963098_kernel,1001.0,20.108,24.576,19.487,1001.0,19.489,23.04,18.912,10001.0,19.837,25.792,19.007,10001.0,19.837,25.792,19.007,10001.0,19.825,25.504,18.976,10001.0,19.778,23.68,18.911,10001.0,21.918000,25.375000,21.088000
output_gpu,Efficientnet,Fused_ReduceSum_split_7498848608164351722_kernel,1001.0,237.103,248.829,234.909,1001.0,236.99400000000003,1096.786,234.526,10001.0,232.423,244.86,231.804,10001.0,232.423,244.86,231.804,10001.0,232.409,244.571,231.804,10001.0,232.645,1018.42,231.837,10001.0,234.706,244.606,233.758000
output_gpu,Efficientnet,Fused_Mul_fusion_6479548354026405088_kernel,1001.0,1.388,3.232,1.344,1001.0,1.388,1.7919999999999998,1.344,10001.0,1.386,1.7919999999999998,1.3430000000000002,10001.0,1.386,1.7919999999999998,1.3430000000000002,10001.0,1.386,3.296,1.343,10001.0,1.386,1.792,1.344,10001.0,3.260000,4.160000,3.199000
output_gpu,Efficientnet,Fused_Mul_fusion_8222351837113126454_kernel,1001.0,1.433,4.0,1.4069999999999998,1001.0,1.3,1.536,1.279,10001.0,1.43,4.32,1.4069999999999998,10001.0,1.43,4.32,1.4069999999999998,10001.0,1.43,4.096,1.407,10001.0,1.227,1.472,1.215,10001.0,3.050000,3.552000,2.976000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_12213906370282907124_kernel,1001.0,185.194,193.277,181.501,1001.0,186.065,190.014,185.406,10001.0,186.167,192.892,185.148,10001.0,186.167,192.892,185.148,10001.0,186.181,192.893,185.117,10001.0,186.244,193.15,185.214,10001.0,189.131,193.278,186.398
output_gpu,Efficientnet,Fused_Sub_Exp_ReduceSum_split_15937433446338712464_kernel,1001.0,7.572999999999999,10.432,7.487,1001.0,7.57,10.464,7.519,10001.0,3.088,7.007999999999999,3.039,10001.0,3.088,7.007999999999999,3.039,10001.0,3.089,6.72,3.039,10001.0,3.102,6.976,3.04,10001.0,4.757000,5.888000,4.703000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_1692033580175145706_kernel,1001.0,270.995,279.805,266.492,1001.0,263.396,269.885,257.308,10001.0,256.492,270.04299999999995,254.267,10001.0,256.492,270.04299999999995,254.267,10001.0,257.134,272.091,254.459,10001.0,257.142,268.125,254.525,10001.0,265.688,277.18199999999996,263.00600000000003
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_8245829412028155559_kernel,1001.0,1.243,3.968,1.215,1001.0,1.245,3.936,1.215,10001.0,1.242,4.256,1.215,10001.0,1.242,4.256,1.215,10001.0,1.241,4.032,1.215,10001.0,1.241,4.255,1.215,10001.0,3.185000,3.808000,3.103000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_15940470878870481376_kernel,1001.0,4.575,7.711,4.48,1001.0,4.604,7.616,4.48,10001.0,14.261,17.951,13.919,10001.0,14.261,17.951,13.919,10001.0,4.317,7.167,4.287,10001.0,4.319,4.832,4.287,10001.0,6.357000,6.880000,6.303000
output_gpu,Efficientnet,Fused_Add_Mul_fusion_15266937697298202316_kernel,1001.0,61.308,67.903,60.639,1001.0,58.18,64.639,57.407,10001.0,58.103,65.406,57.279,10001.0,58.103,65.406,57.279,10001.0,58.111,63.998000000000005,57.183,10001.0,58.104000000000006,64.54400000000001,57.247,10001.0,61.259,64.96000000000001,60.031
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_940467421458833319_kernel,1001.0,1.525,4.32,1.503,1001.0,1.527,4.5760000000000005,1.503,10001.0,1.5219999999999998,4.736000000000001,1.503,10001.0,1.5219999999999998,4.736000000000001,1.503,10001.0,1.522,4.352,1.503,10001.0,1.522,1.92,1.503,10001.0,3.527000,4.128000,3.456000
output_gpu,Efficientnet,Fused_Mul_fusion_4807599095533490647_kernel,1001.0,1.228,3.968,1.1840000000000002,1001.0,1.231,4.0,1.1840000000000002,10001.0,1.226,4.256,1.1840000000000002,10001.0,1.226,4.256,1.1840000000000002,10001.0,1.225,4.096,1.183,10001.0,1.225,1.6,1.184,10001.0,2.960000,3.488000,2.879000
output_gpu,Efficientnet,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.426,4.736000000000001,1.4069999999999998,10001.0,1.421,3.4560000000000004,1.4069999999999998,10001.0,1.421,3.4560000000000004,1.4069999999999998,10001.0,1.421,3.488,1.407,10001.0,1.261,1.792,1.247,10001.0,3.287000,4.000000,3.200000
output_gpu,Efficientnet,Fused_LessEqual_Cast_split_633807755266141474_kernel,1001.0,1.43,3.968,1.4069999999999998,1001.0,1.4269999999999998,1.664,1.4069999999999998,10001.0,1.429,7.808,1.4069999999999998,10001.0,1.429,7.808,1.4069999999999998,10001.0,1.428,4.096,1.407,10001.0,1.428,1.792,1.407,10001.0,3.046000,3.456000,2.976000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_15677824887283063820_kernel,1001.0,3.4360000000000004,6.752000000000001,3.36,1001.0,3.435,4.096,3.36,10001.0,3.429,7.007999999999999,3.3280000000000003,10001.0,3.429,7.007999999999999,3.3280000000000003,10001.0,3.423,6.88,3.327,10001.0,2.487,3.296,2.4,10001.0,5.629000,6.432000,5.504000
output_gpu,Efficientnet,Fused_Sub_split_6275897674537880412_kernel,1001.0,1.394,4.256,1.375,1001.0,1.394,4.16,1.375,10001.0,1.391,4.448,1.375,10001.0,1.391,4.448,1.375,10001.0,1.392,8.512,1.375,10001.0,1.393,2.048,1.375,10001.0,3.416000,3.968000,3.329000
output_gpu,Efficientnet,Fused_DropoutGrad_Reshape_Tile_RealDiv_fusion_9444282056329394066_kernel,1001.0,62.841,67.87100000000001,62.207,1001.0,63.451,68.479,62.207,10001.0,59.016000000000005,66.943,56.415,10001.0,59.016000000000005,66.943,56.415,10001.0,59.736,66.815,56.383,10001.0,60.083,695.96,56.543000000000006,10001.0,65.72999999999999,70.303,63.071000000000005
output_gpu,Efficientnet,Fused_Add_Mul_fusion_14661561268987095456_kernel,1001.0,26.652,29.984,26.015,1001.0,25.778,32.063,25.024,10001.0,25.742,32.159,24.896,10001.0,25.742,32.159,24.896,10001.0,25.702,31.84,25.023,10001.0,25.785,774.072,24.96,10001.0,28.158000,32.190999999999995,27.264000
output_gpu,Efficientnet,Fused_Add_Mul_fusion_1697494057164298535_kernel,1001.0,45.955,51.711000000000006,45.248000000000005,1001.0,41.467,47.231,40.927,10001.0,41.373000000000005,47.583,40.671,10001.0,41.373000000000005,47.583,40.671,10001.0,41.406000000000006,47.52,40.703,10001.0,41.312000000000005,45.151,40.639,10001.0,43.606,47.294999999999995,42.879
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_13797654758739466660_kernel,1001.0,65.182,71.10300000000001,64.288,1001.0,62.097,65.95100000000001,60.735,10001.0,60.476000000000006,66.366,59.743,10001.0,60.476000000000006,66.366,59.743,10001.0,60.475,65.791,59.807,10001.0,60.614,825.59,59.743,10001.0,62.535,65.91900000000001,61.92
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_3573187402831086557_kernel,1001.0,1.244,3.935,1.215,1001.0,1.245,1.632,1.215,10001.0,1.243,4.448,1.215,10001.0,1.243,4.448,1.215,10001.0,1.243,4.064,1.215,10001.0,1.241,1.632,1.215,10001.0,3.191000,3.808000,3.104000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_7079638272548755599_kernel,1001.0,1.241,3.936,1.215,1001.0,1.245,1.664,1.215,10001.0,1.241,9.536,1.215,10001.0,1.241,9.536,1.215,10001.0,1.239,4.0,1.215,10001.0,1.24,9.6,1.215,10001.0,3.210000,3.744000,3.135000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_4090713959143906870_kernel,1001.0,1.062,3.872,1.023,1001.0,1.0590000000000002,1.344,1.024,10001.0,1.061,4.224,1.023,10001.0,1.061,4.224,1.023,10001.0,1.061,3.744,1.023,10001.0,1.059,1.376,1.023,10001.0,2.994000,3.488000,2.943000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_3219141659701939205_kernel,1001.0,1.661,4.448,1.631,1001.0,1.658,2.048,1.631,10001.0,1.6569999999999998,4.8,1.631,10001.0,1.6569999999999998,4.8,1.631,10001.0,1.657,4.672,1.631,10001.0,1.661,9.28,1.631,10001.0,3.680000,4.256000,3.615000
output_gpu,Efficientnet,Fused_Mul_fusion_6115235304061678813_kernel,1001.0,1.4340000000000002,4.0,1.4069999999999998,1001.0,1.433,1.696,1.4069999999999998,10001.0,1.432,4.384,1.4069999999999998,10001.0,1.432,4.384,1.4069999999999998,10001.0,1.432,4.096,1.407,10001.0,1.432,1.664,1.407,10001.0,3.046000,4.607000,3.007000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_5188609352375322588_kernel,1001.0,20.113,24.96,19.392,1001.0,19.501,23.616,18.943,10001.0,19.822,25.728,18.976,10001.0,19.822,25.728,18.976,10001.0,19.835,25.631,19.008,10001.0,19.767,23.616000000000003,18.911,10001.0,22.101000,25.472000,20.736000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_5428261959146241371_kernel,1001.0,1.271,3.968,1.247,1001.0,1.27,4.0,1.247,10001.0,1.268,4.32,1.247,10001.0,1.268,4.32,1.247,10001.0,1.268,4.063,1.247,10001.0,1.268,1.696,1.247,10001.0,3.241000,3.809000,3.168000
output_gpu,Efficientnet,Fused_Reshape_Tile_RealDiv_Mul_Add_fusion_15017782892077766494_kernel,1001.0,115.117,121.022,114.143,1001.0,82.475,87.90299999999999,81.727,10001.0,81.44999999999997,87.96700000000001,80.57400000000001,10001.0,81.44999999999997,87.96700000000001,80.57400000000001,10001.0,81.525,87.51899999999999,80.479,10001.0,81.44200000000001,972.309,80.447,10001.0,84.106,87.743,83.263
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_5479047501018117380_kernel,1001.0,1.735,4.544,1.695,1001.0,1.74,4.6080000000000005,1.696,10001.0,1.73,4.96,1.695,10001.0,1.73,4.96,1.695,10001.0,1.732,4.576,1.695,10001.0,1.733,2.208,1.695,10001.0,3.497000,4.064000,3.423000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_1703812871172476354_kernel,1001.0,1.119,3.776,1.087,1001.0,1.12,4.032,1.087,10001.0,1.12,4.384,1.087,10001.0,1.12,4.384,1.087,10001.0,1.119,3.936,1.087,10001.0,1.118,1.568,1.087,10001.0,3.077000,3.584000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_3130351152002048416_kernel,1001.0,1.241,3.935,1.215,1001.0,1.246,2.432,1.215,10001.0,1.24,4.256,1.215,10001.0,1.24,4.256,1.215,10001.0,1.239,4.096,1.215,10001.0,1.24,8.672,1.215,10001.0,3.186000,4.064000,3.135000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_1517777642279240205_kernel,1001.0,1.974,4.992,1.951,1001.0,1.693,2.112,1.663,10001.0,1.965,5.12,1.92,10001.0,1.965,5.12,1.92,10001.0,1.966,5.184,1.92,10001.0,1.692,2.112,1.663,10001.0,3.755000,4.416000,3.711000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_split_13992973118270838490_kernel,1001.0,1.245,3.936,1.215,1001.0,1.247,1.888,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,4.032,1.215,10001.0,1.243,5.952,1.215,10001.0,3.174000,3.712000,3.103000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_Reshape_Mul_split_8699107882658219152_kernel,1001.0,486.536,497.242,484.346,1001.0,390.138,1084.467,387.643,10001.0,388.816,1012.685,387.673,10001.0,388.816,1012.685,387.673,10001.0,388.761,396.537,387.641,10001.0,388.882,1114.515,387.643,10001.0,391.40200000000004,396.445,390.237
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_1435307521526436233_kernel,1001.0,20.715,26.272,20.192,1001.0,20.736,24.768,20.191,10001.0,20.706,26.815,20.0,10001.0,20.706,26.815,20.0,10001.0,20.668000000000003,26.495,20.0,10001.0,20.171000000000003,23.584,19.552,10001.0,23.121000,27.360000,22.176000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_1722693735800738113_kernel,1001.0,3.4530000000000003,6.656000000000001,3.392,1001.0,3.548,4.288,3.488,10001.0,13.541,16.863,12.959,10001.0,13.541,16.863,12.959,10001.0,4.243,7.007,4.223,10001.0,4.243,4.608,4.223,10001.0,6.388000,6.912000,6.335000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_187408248622839644_kernel,1001.0,1.224,3.04,1.183,1001.0,1.223,1.536,1.1840000000000002,10001.0,1.225,3.04,1.183,10001.0,1.225,3.04,1.183,10001.0,1.224,3.072,1.183,10001.0,1.225,9.247,1.183,10001.0,3.074000,3.712000,3.007000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_7684697635601219847_kernel,1001.0,1.272,3.968,1.247,1001.0,1.286,2.656,1.247,10001.0,1.272,4.32,1.247,10001.0,1.272,4.32,1.247,10001.0,1.27,4.032,1.247,10001.0,1.271,1.856,1.247,10001.0,3.242000,3.808000,3.167000
output_gpu,Efficientnet,Fused_Mul_fusion_9171560666797794185_kernel,1001.0,1.389,3.264,1.344,1001.0,1.394,3.2960000000000003,1.344,10001.0,1.3869999999999998,3.3280000000000003,1.344,10001.0,1.3869999999999998,3.3280000000000003,1.344,10001.0,1.387,3.328,1.343,10001.0,1.19,1.568,1.151,10001.0,3.038000,3.584000,2.975000
output_gpu,Efficientnet,Fused_Sub_split_13922706589884589283_kernel,1001.0,1.298,4.032,1.279,1001.0,1.2990000000000002,4.32,1.279,10001.0,1.296,4.352,1.279,10001.0,1.296,4.352,1.279,10001.0,1.296,4.096,1.279,10001.0,1.299,4.224,1.279,10001.0,3.082000,4.448000,3.039000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_2977706044310277629_kernel,1001.0,1.211,3.872,1.183,1001.0,1.211,1.888,1.183,10001.0,1.212,6.848,1.183,10001.0,1.212,6.848,1.183,10001.0,1.211,4.096,1.183,10001.0,1.211,9.6,1.183,10001.0,3.220000,3.743000,3.167000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_14970288867770865115_kernel,1001.0,1.294,4.032,1.247,1001.0,1.2919999999999998,1.664,1.248,10001.0,1.2919999999999998,4.512,1.247,10001.0,1.2919999999999998,4.512,1.247,10001.0,1.292,4.32,1.247,10001.0,1.175,1.6,1.151,10001.0,3.254000,3.840000,3.167000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_14791304097862478194_kernel,1001.0,1.336,4.224,1.311,1001.0,1.334,1.7280000000000002,1.311,10001.0,1.334,4.5760000000000005,1.311,10001.0,1.334,4.5760000000000005,1.311,10001.0,1.333,4.128,1.311,10001.0,1.213,1.6,1.183,10001.0,3.360000,3.967000,3.295000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_18186114662530712923_kernel,1001.0,148.436,155.07,147.422,1001.0,142.061,148.063,141.438,10001.0,143.65,149.309,142.749,10001.0,143.65,149.309,142.749,10001.0,143.64700000000002,148.765,142.813,10001.0,143.847,850.39,142.87900000000002,10001.0,146.426,149.119,145.343
output_gpu,Efficientnet,Fused_Sub_split_17403689683615116560_kernel,1001.0,1.629,4.416,1.599,1001.0,1.629,4.416,1.599,10001.0,1.6269999999999998,4.736000000000001,1.599,10001.0,1.6269999999999998,4.736000000000001,1.599,10001.0,1.626,4.639,1.599,10001.0,1.629,2.048,1.599,10001.0,3.395000,4.256000,3.327000
output_gpu,Efficientnet,Fused_Neg_Exp_Add_RealDiv_split_3765076268242095930_kernel,1001.0,1.3259999999999998,4.0,1.311,1001.0,1.3230000000000002,1.696,1.311,10001.0,1.3230000000000002,4.352,1.311,10001.0,1.3230000000000002,4.352,1.311,10001.0,1.323,4.064,1.311,10001.0,1.162,7.776,1.119,10001.0,3.079000,3.552000,3.007000
output_gpu,Efficientnet,Fused_Sub_split_439512258704561033_kernel,1001.0,1.84,4.704,1.823,1001.0,1.84,4.704,1.7919999999999998,10001.0,1.837,5.024,1.7919999999999998,10001.0,1.837,5.024,1.7919999999999998,10001.0,1.838,4.736,1.791,10001.0,1.879,2.336,1.855,10001.0,3.652000,4.416000,3.583000
output_gpu,Efficientnet,Fused_Sub_split_13313664138349900053_kernel,1001.0,1.816,4.6080000000000005,1.791,1001.0,1.815,4.704,1.791,10001.0,1.81,4.96,1.791,10001.0,1.81,4.96,1.791,10001.0,1.811,4.704,1.76,10001.0,1.812,2.272,1.791,10001.0,3.585000,7.136000,3.519000
output_gpu,Efficientnet,Fused_BroadcastTo_inplace_assign_builder_14935873129798187420_kernel,1001.0,1.266,3.712,1.247,1001.0,1.266,4.064,1.247,10001.0,1.264,4.448,1.247,10001.0,1.264,4.448,1.247,10001.0,1.264,3.776,1.247,10001.0,1.268,9.472,1.247,10001.0,3.104000,4.512000,3.039000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_2681961934642545650_kernel,1001.0,10.931,13.6,10.815,1001.0,11.093,11.455,10.944,10001.0,14.292,17.887999999999998,13.023,10001.0,14.292,17.887999999999998,13.023,10001.0,4.482,7.584,4.415,10001.0,4.484,5.344,4.415,10001.0,6.367000,7.424000,6.239000
output_gpu,Efficientnet,Fused_Mul_Add_Mul_fusion_12198158277290552354_kernel,1001.0,1.849,4.832,1.823,1001.0,1.849,2.3040000000000003,1.823,10001.0,1.845,9.279,1.823,10001.0,1.845,9.279,1.823,10001.0,1.845,4.736,1.823,10001.0,1.847,2.304,1.823,10001.0,3.909000,4.575000,3.839000
output_gpu,Efficientnet,Fused_Mul_Mul_ReduceSum_split_12912535945341581353_kernel,1001.0,8.121,10.848,8.031,1001.0,8.111,8.416,7.968,10001.0,14.877,17.823,14.399,10001.0,14.877,17.823,14.399,10001.0,4.331,7.264,4.255,10001.0,4.336,4.8,4.255,10001.0,6.458000,7.072000,6.367000
output_gpu,Efficientnet,Fused_LessEqual_Cast_Mul_Add_split_4676768287700606114_kernel,1001.0,46.356,51.679,45.471,1001.0,43.299,46.879,42.495,10001.0,43.114,48.896,42.431,10001.0,43.114,48.896,42.431,10001.0,43.137,49.087,42.431,10001.0,43.069,46.24,42.495,10001.0,45.461,48.799,44.512
output_gpu,bert_15cls,Fused_Reciprocal_split_9715664751193780275_kernel,1001.0,1.418,3.424,1.376,1001.0,1.223,1.7280000000000002,1.1840000000000002,10001.0,1.4169999999999998,3.488,1.375,10001.0,1.4169999999999998,3.488,1.375,10001.0,1.416,3.52,1.375,10001.0,1.421,3.584,1.375,10001.0,3.049000,3.744000,3.007000
output_gpu,bert_15cls,Fused_ReduceSum_Mul_split_14504185833015688388_kernel,1001.0,2.385,4.768,2.367,1001.0,2.056,2.944,2.016,10001.0,2.094,4.384,2.079,10001.0,2.094,4.384,2.079,10001.0,2.093,4.32,2.079,10001.0,2.097,8.608,2.079,10001.0,3.790000,5.120000,3.743000
output_gpu,bert_15cls,Fused_Add_Reshape_Reshape_Add_split_14093257215561905173_kernel,1001.0,49.28,53.791,48.319,1001.0,47.424,53.951,46.944,10001.0,47.41,54.143,46.751000000000005,10001.0,47.41,54.143,46.751000000000005,10001.0,47.408,53.279,46.783,10001.0,47.777,838.582,46.944,10001.0,49.552,54.272,48.256
output_gpu,bert_15cls,Fused_Reshape_LessEqual_Sub_LessEqual_LogicalOr_Select_Mul_Maximum_Select_fusion_1537099880519983217_kernel,1001.0,1.807,2.624,1.791,1001.0,1.807,2.623,1.791,10001.0,1.806,4.288,1.791,10001.0,1.806,4.288,1.791,10001.0,1.806,8.96,1.791,10001.0,1.55,2.496,1.535,10001.0,3.387000,4.352000,3.327000
output_gpu,bert_15cls,Fused_Mul_Mul_ReduceSum_Mul_split_7563720092882377888_kernel,1001.0,6.41,8.736,6.24,1001.0,6.42,7.263999999999999,6.24,10001.0,2.676,6.4,2.655,10001.0,2.676,6.4,2.655,10001.0,2.675,5.6,2.655,10001.0,2.676,10.144,2.655,10001.0,4.361000,5.344000,4.287000
output_gpu,bert_15cls,Fused_Mul_Mul_ReduceSum_Mul_split_4931910940408955701_kernel,1001.0,6.444,8.863999999999999,6.4,1001.0,5.537000000000001,6.4,5.44,10001.0,2.496,8.128,2.463,10001.0,2.496,8.128,2.463,10001.0,2.494,5.504,2.463,10001.0,2.136,3.04,2.111,10001.0,4.134000,5.408000,4.063000
output_gpu,bert_15cls,Fused_ReduceSum_Mul_split_847463926504720470_kernel,1001.0,18.026,31.008000000000003,17.727999999999998,1001.0,17.517,27.167,17.247,10001.0,17.942999999999998,28.927,17.184,10001.0,17.942999999999998,28.927,17.184,10001.0,17.954,27.968000000000004,17.247,10001.0,17.718,29.536,16.96,10001.0,19.943000,28.608,19.455000
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_779582814477784037_kernel,1001.0,1.654,4.448,1.631,1001.0,1.653,1.984,1.631,10001.0,1.652,4.768,1.631,10001.0,1.652,4.768,1.631,10001.0,1.652,4.16,1.631,10001.0,1.655,4.384,1.631,10001.0,3.683000,4.448000,3.616000
output_gpu,bert_15cls,Fused_Sub_Mul_Mul_split_14426505140340145857_kernel,1001.0,94.033,98.974,93.438,1001.0,91.432,95.327,90.751,10001.0,91.468,97.758,90.718,10001.0,91.468,97.758,90.718,10001.0,91.459,97.823,90.622,10001.0,91.641,784.055,90.751,10001.0,93.779,98.43100000000001,92.896
output_gpu,bert_15cls,Fused_Mul_Mul_Add_Add_Mul_Mul_ReduceSum_split_7548525193193236377_kernel,1001.0,1327.318,1611.4029999999998,848.693,1001.0,1312.48,1917.156,754.71,10001.0,88.00299999999999,93.15,85.758,10001.0,88.00299999999999,93.15,85.758,10001.0,88.39200000000001,93.215,86.814,10001.0,88.311,93.887,86.751,10001.0,90.87,95.13499999999999,89.119000
output_gpu,bert_15cls,Fused_Sub_Exp_ReduceSum_split_10845074449741576939_kernel,1001.0,86.585,95.582,85.40700000000001,1001.0,86.565,92.287,85.279,10001.0,81.594,96.766,77.726,10001.0,81.594,96.766,77.726,10001.0,81.413,97.086,77.759,10001.0,79.593,807.895,77.951,10001.0,85.453,93.023,83.679000
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_12101948072875812668_kernel,1001.0,1.222,2.9760000000000004,1.1840000000000002,1001.0,1.223,1.664,1.1840000000000002,10001.0,1.209,3.04,1.183,10001.0,1.209,3.04,1.183,10001.0,1.208,1.632,1.183,10001.0,1.04,9.344,0.992,10001.0,3.059000,3.777000,3.007000
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_3729463149435263926_kernel,1001.0,1.056,3.648,1.023,1001.0,1.055,1.376,1.023,10001.0,1.056,4.0,1.023,10001.0,1.056,4.0,1.023,10001.0,1.058,3.424,1.023,10001.0,1.053,3.648,1.023,10001.0,2.988000,3.456000,2.912000
output_gpu,bert_15cls,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_17865465815588720940_kernel,1001.0,18.542,27.616,18.08,1001.0,17.929000000000002,28.096,17.504,10001.0,23.154,32.64,21.472,10001.0,23.154,32.64,21.472,10001.0,24.147,32.607,21.983,10001.0,24.43,712.536,21.952,10001.0,26.605000,32.64,24.288000
output_gpu,bert_15cls,Fused_Add_fusion_11383007565367662187_kernel,1001.0,1.371,3.392,1.3430000000000002,1001.0,1.369,1.888,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.392,1.343,10001.0,1.175,1.664,1.151,10001.0,3.240000,3.968000,3.167000
output_gpu,bert_15cls,Fused_Mul_Mul_ReduceSum_Mul_split_7030311586437121275_kernel,1001.0,6.581,9.056,6.559,1001.0,5.657,6.5920000000000005,5.631,10001.0,2.464,4.832,2.431,10001.0,2.464,4.832,2.431,10001.0,2.463,4.992,2.431,10001.0,2.112,2.88,2.079,10001.0,4.107000,5.120000,4.031000
output_gpu,bert_15cls,Fused_LogicalNot_LogicalAnd_Mul_Select_Assign_fusion_7177650377850726476_kernel,1001.0,1.587,3.712,1.567,1001.0,1.479,2.112,1.471,10001.0,1.396,2.048,1.375,10001.0,1.396,2.048,1.375,10001.0,1.627,3.84,1.599,10001.0,1.405,9.376,1.375,10001.0,3.471000,4.352000,3.423000
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_15466795178772919507_kernel,1001.0,1.141,3.968,1.119,1001.0,1.141,3.872,1.119,10001.0,1.141,4.128,1.119,10001.0,1.141,4.128,1.119,10001.0,1.139,3.552,1.119,10001.0,0.989,1.472,0.959,10001.0,3.098000,3.584000,3.039000
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.224,2.9760000000000004,1.183,1001.0,1.224,1.952,1.1840000000000002,10001.0,1.223,8.256,1.183,10001.0,1.223,8.256,1.183,10001.0,1.223,3.36,1.183,10001.0,1.207,2.976,1.183,10001.0,3.069000,3.552000,3.007000
output_gpu,bert_15cls,Fused_Mul_Mul_Add_split_11808595724686856598_kernel,1001.0,36.674,41.631,36.063,1001.0,33.909,36.992,33.343,10001.0,34.145,40.415,33.279,10001.0,34.145,40.415,33.279,10001.0,34.154,39.647,33.279,10001.0,32.486000000000004,36.512,31.648000000000003,10001.0,36.201,40.0,35.583
output_gpu,bert_15cls,Fused_DropoutGrad_17130499691191415029_kernel,1001.0,1.3869999999999998,4.128,1.344,1001.0,1.384,1.76,1.344,10001.0,1.385,4.6080000000000005,1.344,10001.0,1.385,4.6080000000000005,1.344,10001.0,1.384,3.84,1.344,10001.0,1.384,1.856,1.344,10001.0,3.409000,3.968000,3.329000
output_gpu,bert_15cls,Fused_Reshape_ReduceSum_split_6969894200064021905_kernel,1001.0,66.408,72.063,64.79899999999999,1001.0,66.117,71.583,64.767,10001.0,16.509,24.608,15.967,10001.0,16.509,24.608,15.967,10001.0,16.518,24.928,15.999,10001.0,16.289,22.496,15.808,10001.0,18.741000,24.8,18.208000
output_gpu,bert_15cls,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_15211520463956997496_kernel,1001.0,1.379,4.032,1.3430000000000002,1001.0,1.376,1.727,1.3430000000000002,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.376,3.808,1.343,10001.0,1.376,1.696,1.343,10001.0,3.346000,3.872000,3.295000
output_gpu,bert_15cls,Fused_ReduceSum_split_15255374000456318533_kernel,1001.0,2.77,5.664,2.72,1001.0,2.7710000000000004,6.015,2.72,10001.0,1.805,4.832,1.791,10001.0,1.805,4.832,1.791,10001.0,1.529,4.192,1.503,10001.0,1.318,8.416,1.279,10001.0,3.260000,3.808000,3.199000
output_gpu,bert_15cls,Fused_Transpose_split_6974001358186712233_kernel,1001.0,36.614,41.887,35.743,1001.0,37.171,41.983,36.319,10001.0,33.764,39.84,32.864000000000004,10001.0,33.764,39.84,32.864000000000004,10001.0,33.944,39.487,33.151,10001.0,75.131,86.23899999999999,71.071,10001.0,80.824,81.663,80.447
output_gpu,bert_15cls,Fused_BroadcastTo_inplace_assign_builder_2838470725666470184_kernel,1001.0,1.225,2.9760000000000004,1.183,1001.0,1.224,1.6,1.1840000000000002,10001.0,1.225,3.168,1.183,10001.0,1.225,3.168,1.183,10001.0,1.224,3.04,1.183,10001.0,1.223,1.536,1.183,10001.0,3.083000,3.712000,3.007000
output_gpu,bert_15cls,Fused_ReduceSum_split_16448887507098774487_kernel,1001.0,66.32,71.551,64.831,1001.0,66.066,71.455,64.991,10001.0,16.532999999999998,24.864,16.031,10001.0,16.532999999999998,24.864,16.031,10001.0,16.52,24.576,16.0,10001.0,16.296,22.112,15.807,10001.0,18.473000,24.416,17.951000
output_gpu,bert_15cls,Fused_GeLU_12768812415171470673_kernel,1001.0,130.792,136.766,130.20600000000002,1001.0,130.847,134.399,130.334,10001.0,130.404,136.637,129.725,10001.0,130.404,136.637,129.725,10001.0,130.355,136.221,129.725,10001.0,130.55,776.919,129.63,10001.0,133.08,137.407,132.223
output_gpu,bert_15cls,Fused_Reshape_ReduceSum_split_601052953231422684_kernel,1001.0,66.303,73.183,64.79899999999999,1001.0,66.102,71.423,64.767,10001.0,16.552999999999994,24.415,15.968,10001.0,16.552999999999994,24.415,15.968,10001.0,16.493000000000002,24.384,15.968,10001.0,16.29,22.528,15.84,10001.0,18.504000,24.096,17.983000
output_gpu,bert_15cls,Fused_Reshape_Transpose_fusion_9854266247006671863_kernel,1001.0,35.477,40.511,34.848,1001.0,36.709,40.99100000000001,35.424,10001.0,32.984,40.0,32.319,10001.0,32.984,40.0,32.319,10001.0,33.006,39.008,32.224000000000004,10001.0,71.24900000000001,71.967,71.071,10001.0,80.732,83.136,80.383
output_gpu,bert_15cls,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.423,3.552,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.776,1.407,10001.0,1.229,1.728,1.215,10001.0,3.272000,4.288000,3.199000
output_gpu,bert_15cls,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_13095791597391198535_kernel,1001.0,1.379,4.064,1.3430000000000002,1001.0,1.379,4.096,1.3430000000000002,10001.0,1.3769999999999998,4.416,1.3430000000000002,10001.0,1.3769999999999998,4.416,1.3430000000000002,10001.0,1.376,3.808,1.343,10001.0,1.377,4.352,1.343,10001.0,3.351000,3.872000,3.295000
output_gpu,bert_15cls,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Add_Mul_R_more_split_6057875861280764015_kernel,1001.0,2526.303,3431.605,1701.802,1001.0,2527.219,2901.046,1762.95,10001.0,211.231,220.124,206.012,10001.0,211.231,220.124,206.012,10001.0,221.909,888.88,216.476,10001.0,221.788,1023.828,217.053,10001.0,225.72400000000002,232.958,221.374000
output_gpu,bert_15cls,Fused_Cast_Greater_Cast_Sub_Minimum_Mul_Sub_Mul_Add_Mul_Minimum_Cast_Mul_Mul_Add_split_5060824047314001111_kernel,1001.0,1.404,3.392,1.375,1001.0,1.403,1.952,1.375,10001.0,1.403,3.488,1.375,10001.0,1.403,3.488,1.375,10001.0,1.402,3.488,1.375,10001.0,1.402,1.952,1.375,10001.0,3.271000,4.000000,3.199000
output_gpu,bert_15cls,Fused_Reshape_Cast_Reshape_Sub_Mul_split_8135735155960499556_kernel,1001.0,1.247,3.968,1.215,1001.0,1.245,1.6,1.215,10001.0,1.242,4.448,1.215,10001.0,1.242,4.448,1.215,10001.0,1.241,3.68,1.215,10001.0,1.068,1.664,1.024,10001.0,3.001000,3.488000,2.943000
output_gpu,bert_15cls,Fused_RealDiv_Mul_LessEqual_Cast_Mul_split_1307138256337638741_kernel,1001.0,164.53799999999998,170.43,163.454,1001.0,159.39700000000002,163.19799999999998,158.27,10001.0,159.84599999999998,166.493,158.333,10001.0,159.84599999999998,166.493,158.333,10001.0,159.855,165.75699999999995,158.365,10001.0,159.997,854.87,158.526,10001.0,161.462,165.982,160.06300000000002
output_gpu,bert_15cls,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_7566706435819628490_kernel,1001.0,18.539,28.032,18.016,1001.0,17.922,24.831,17.535999999999998,10001.0,24.261,31.551,21.888,10001.0,24.261,31.551,21.888,10001.0,23.546,33.087,21.855,10001.0,23.681,33.792,21.792,10001.0,25.210000,32.608,24.064000
output_gpu,bert_15cls,Fused_Mul_Mul_Mul_ReduceSum_split_592974943113998615_kernel,1001.0,126.865,133.662,125.502,1001.0,127.44,135.39,126.174,10001.0,127.304,133.822,125.341,10001.0,127.304,133.822,125.341,10001.0,127.347,132.253,125.598,10001.0,127.62,867.03,126.079,10001.0,128.869,138.079,127.007000
output_gpu,bert_15cls,Fused_Mul_Mul_Add_Mul_LessEqual_Cast_Mul_split_9452475813991627793_kernel,1001.0,68.126,74.399,67.391,1001.0,65.624,68.863,65.023,10001.0,65.72399999999999,72.79899999999999,64.83,10001.0,65.72399999999999,72.79899999999999,64.83,10001.0,65.736,71.455,64.959,10001.0,63.754000000000005,69.759,62.78300000000001,10001.0,68.033,72.863,67.168
output_gpu,bert_15cls,Fused_Tanh_Dropout_fusion_15505905983801999224_kernel,1001.0,1.645,4.384,1.599,1001.0,1.396,1.888,1.375,10001.0,1.643,4.672,1.599,10001.0,1.643,4.672,1.599,10001.0,1.643,4.544,1.599,10001.0,1.652,4.672,1.631,10001.0,3.424000,3.968000,3.359000
output_gpu,bert_15cls,Fused_Mul_Mul_ReduceSum_Mul_split_18046230714060098124_kernel,1001.0,2.149,4.352,2.111,1001.0,1.844,2.592,1.823,10001.0,2.147,4.64,2.111,10001.0,2.147,4.64,2.111,10001.0,2.146,4.512,2.111,10001.0,1.839,2.56,1.823,10001.0,3.744000,4.768000,3.679000
output_gpu,bert_15cls,Fused_Mul_Add_ReduceMax_split_14375517492105268498_kernel,1001.0,86.25299999999999,95.007,85.119,1001.0,80.667,86.334,78.911,10001.0,80.234,94.75,76.607,10001.0,80.234,94.75,76.607,10001.0,80.65100000000001,94.942,76.703,10001.0,78.56299999999999,85.18299999999999,76.799,10001.0,86.177,98.239,83.328000
output_gpu,bert_15cls,Fused_Reshape_Transpose_fusion_13614983290056184942_kernel,1001.0,35.476,41.152,34.816,1001.0,36.709,43.103,35.615,10001.0,32.99,39.744,32.287000000000006,10001.0,32.99,39.744,32.287000000000006,10001.0,33.011,38.943000000000005,32.288000000000004,10001.0,74.41399999999999,702.808,71.071,10001.0,80.76700000000001,81.72699999999999,79.775
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_14896484744391315230_kernel,1001.0,1.056,3.648,1.023,1001.0,0.97,1.344,0.928,10001.0,1.055,3.808,1.023,10001.0,1.055,3.808,1.023,10001.0,1.059,3.712,1.023,10001.0,1.069,13.151,1.023,10001.0,2.814000,4.288000,2.720000
output_gpu,inceptionv3,Fused_BiasAdd_5837691742273487135_kernel,1001.0,1587.036,1614.6989999999998,1568.2679999999998,1001.0,596.768,603.16,592.76,10001.0,597.7719999999998,606.0369999999999,593.109,10001.0,597.7719999999998,606.0369999999999,593.109,10001.0,597.74,605.2689999999999,593.238,10001.0,597.834,1466.381,592.825,10001.0,596.255,602.043,594.171
output_gpu,inceptionv3,Fused_Mul_Add_fusion_2283776153684860403_kernel,1001.0,8.872,12.063,8.544,1001.0,8.881,9.824,8.48,10001.0,8.872,12.128,8.448,10001.0,8.872,12.128,8.448,10001.0,8.893,12.0,8.352,10001.0,7.163,11.392,6.144,10001.0,10.297000,11.584000,9.728000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_9960847462240094894_kernel,1001.0,1.42,4.0,1.4069999999999998,1001.0,1.433,4.064,1.4069999999999998,10001.0,1.418,4.096,1.376,10001.0,1.418,4.096,1.376,10001.0,1.418,4.063,1.375,10001.0,1.426,4.032,1.407,10001.0,3.281000,4.800000,3.200000
output_gpu,inceptionv3,Fused_AddN_15859381231572957417_kernel,1001.0,739.735,743.4789999999999,734.551,1001.0,744.35,1454.7,740.118,10001.0,741.462,744.306,734.6419999999999,10001.0,741.462,744.306,734.6419999999999,10001.0,741.433,743.8589999999999,734.707,10001.0,731.604,1629.45,723.158,10001.0,738.9010000000001,744.794,737.049
output_gpu,inceptionv3,Fused_Mul_Add_split_11364688680250034940_kernel,1001.0,1.406,3.264,1.375,1001.0,1.401,3.2960000000000003,1.375,10001.0,1.404,3.36,1.375,10001.0,1.404,3.36,1.375,10001.0,1.404,3.36,1.375,10001.0,1.396,3.488,1.375,10001.0,3.256000,3.808000,3.199000
output_gpu,inceptionv3,Fused_BiasAdd_3102609220563247185_kernel,1001.0,43.14,47.744,42.527,1001.0,40.104,46.528,39.551,10001.0,40.069,46.623000000000005,39.2,10001.0,40.069,46.623000000000005,39.2,10001.0,40.07100000000001,45.855,39.295,10001.0,40.072,43.232,39.295,10001.0,42.559,46.718999999999994,41.504
output_gpu,inceptionv3,Fused_Dropout_2321807970646948512_kernel,1001.0,3.3110000000000004,6.432,3.2,1001.0,3.293,6.656000000000001,3.168,10001.0,3.284,6.624,3.167,10001.0,3.284,6.624,3.167,10001.0,3.285,6.527,3.167,10001.0,3.269,9.632,3.136,10001.0,5.522000,6.176000,5.376000
output_gpu,inceptionv3,Fused_Mul_Add_split_8250471626896121664_kernel,1001.0,1.42,4.0,1.376,1001.0,1.4269999999999998,1.664,1.4069999999999998,10001.0,1.4169999999999998,4.224,1.375,10001.0,1.4169999999999998,4.224,1.375,10001.0,1.418,4.064,1.376,10001.0,1.426,4.032,1.407,10001.0,3.279000,3.744000,3.200000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_17453393947342062252_kernel,1001.0,1.238,3.936,1.215,1001.0,1.238,1.6,1.215,10001.0,1.236,4.032,1.215,10001.0,1.236,4.032,1.215,10001.0,1.236,4.032,1.215,10001.0,1.238,4.0,1.215,10001.0,3.189000,3.840000,3.135000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_16745005559324775081_kernel,1001.0,2.175,5.12,2.143,1001.0,2.152,5.184,2.111,10001.0,2.168,5.184,2.112,10001.0,2.168,5.184,2.112,10001.0,2.169,5.151,2.112,10001.0,2.137,5.152,2.111,10001.0,3.957000,4.640000,3.871000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_17179616771878150927_kernel,1001.0,3.553,6.72,3.4560000000000004,1001.0,3.549,4.32,3.4560000000000004,10001.0,3.542,6.816,3.455,10001.0,3.542,6.816,3.455,10001.0,3.543,6.848,3.455,10001.0,2.798,6.112,2.688,10001.0,5.301000,6.368000,5.183000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_15729480271255155814_kernel,1001.0,1.263,4.32,1.247,1001.0,1.091,1.376,1.056,10001.0,1.259,3.84,1.247,10001.0,1.259,3.84,1.247,10001.0,1.259,3.84,1.247,10001.0,1.259,13.536,1.247,10001.0,2.903000,4.160000,2.816000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_8261444986825493822_kernel,1001.0,1.262,3.712,1.247,1001.0,1.257,1.44,1.247,10001.0,1.26,3.776,1.247,10001.0,1.26,3.776,1.247,10001.0,1.26,3.808,1.247,10001.0,1.269,9.984,1.247,10001.0,3.087000,3.520000,3.007000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_13284447704307833124_kernel,1001.0,1.242,3.936,1.215,1001.0,1.246,2.3040000000000003,1.215,10001.0,1.24,4.031000000000001,1.215,10001.0,1.24,4.031000000000001,1.215,10001.0,1.241,4.032,1.215,10001.0,1.246,12.96,1.215,10001.0,2.955000,3.489000,2.911000
output_gpu,inceptionv3,Fused_BiasAdd_15937039249418007474_kernel,1001.0,170.217,175.454,168.894,1001.0,156.781,159.454,154.814,10001.0,156.625,160.989,154.749,10001.0,156.625,160.989,154.749,10001.0,156.726,160.893,154.621,10001.0,156.809,870.1659999999999,154.68599999999998,10001.0,158.443,164.191,157.24699999999999
output_gpu,inceptionv3,Fused_Mul_Add_fusion_5956603155092161569_kernel,1001.0,2.551,5.599,2.464,1001.0,2.544,5.6,2.463,10001.0,2.548,5.696000000000001,2.463,10001.0,2.548,5.696000000000001,2.463,10001.0,2.548,5.664,2.463,10001.0,2.333,5.472,2.303,10001.0,4.408000,5.056000,4.287000
output_gpu,inceptionv3,Fused_ReduceSum_split_14908857373033220908_kernel,1001.0,36.87,48.287,36.0,1001.0,36.816,48.256,36.031,10001.0,34.723,45.535,34.208,10001.0,34.723,45.535,34.208,10001.0,34.672000000000004,45.43899999999999,34.143,10001.0,34.647,45.984,34.111,10001.0,36.609,45.504000000000005,36.191000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_12183649897023064181_kernel,1001.0,2.001,4.864,1.952,1001.0,1.977,4.992,1.951,10001.0,1.995,5.055,1.951,10001.0,1.995,5.055,1.951,10001.0,1.996,4.992,1.951,10001.0,2.007,9.696,1.952,10001.0,4.088000,4.768000,4.000000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_3190020807321351807_kernel,1001.0,1.673,4.416,1.632,1001.0,1.635,2.048,1.599,10001.0,1.67,4.512,1.631,10001.0,1.67,4.512,1.631,10001.0,1.67,4.512,1.631,10001.0,1.633,4.48,1.599,10001.0,3.426000,3.968000,3.359000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_7272007293007730701_kernel,1001.0,1.054,3.648,1.023,1001.0,1.062,3.648,1.023,10001.0,1.055,3.936,1.023,10001.0,1.055,3.936,1.023,10001.0,1.055,3.712,1.023,10001.0,1.076,12.319,1.023,10001.0,2.980000,3.457000,2.911000
output_gpu,inceptionv3,Fused_AddN_12911758997986757343_kernel,1001.0,697.952,701.56,693.143,1001.0,697.936,701.75,693.5269999999999,10001.0,699.324,701.6510000000002,693.235,10001.0,699.324,701.6510000000002,693.235,10001.0,699.3380000000001,701.747,693.075,10001.0,690.355,692.759,682.615,10001.0,697.133,699.962,692.058
output_gpu,inceptionv3,Fused_Mul_Add_fusion_7307387026351365000_kernel,1001.0,1.4509999999999998,3.999,1.439,1001.0,1.445,1.7919999999999998,1.408,10001.0,1.449,4.096,1.408,10001.0,1.449,4.096,1.408,10001.0,1.449,4.096,1.408,10001.0,1.445,4.032,1.407,10001.0,3.083000,3.520000,3.007000
output_gpu,inceptionv3,Fused_BiasAdd_916061431518297416_kernel,1001.0,70.258,75.10300000000001,69.631,1001.0,65.15,68.895,64.543,10001.0,65.20100000000001,70.367,64.319,10001.0,65.20100000000001,70.367,64.319,10001.0,65.203,70.815,64.28699999999999,10001.0,65.213,71.423,64.351,10001.0,67.444,71.29599999999999,66.463
output_gpu,inceptionv3,Fused_Sub_Exp_ReduceSum_split_13933643594187294147_kernel,1001.0,7.555,10.272,7.487999999999999,1001.0,7.539,10.367,7.423999999999999,10001.0,3.083,6.752000000000001,3.039,10001.0,3.083,6.752000000000001,3.039,10001.0,3.084,6.816,3.039,10001.0,3.09,6.912,3.039,10001.0,4.743000,5.696000,4.671000
output_gpu,inceptionv3,Fused_Add_Add_Add_split_9026293421852915843_kernel,1001.0,258.43,264.22,257.341,1001.0,258.877,264.733,257.66,10001.0,259.688,264.027,257.243,10001.0,259.688,264.027,257.243,10001.0,259.717,264.155,257.308,10001.0,256.781,260.732,253.309,10001.0,260.359,264.317,258.43
output_gpu,inceptionv3,Fused_BiasAdd_9136358221751171452_kernel,1001.0,161.468,168.894,157.31,1001.0,111.101,114.494,110.494,10001.0,110.792,116.574,109.982,10001.0,110.792,116.574,109.982,10001.0,110.809,116.382,109.886,10001.0,110.796,116.767,109.951,10001.0,112.803,116.319,111.711
output_gpu,inceptionv3,Fused_Mul_Add_fusion_18058537793465107034_kernel,1001.0,2.175,5.12,2.08,1001.0,2.17,5.184,2.08,10001.0,2.168,5.247999999999999,2.08,10001.0,2.168,5.247999999999999,2.08,10001.0,2.169,5.215,2.08,10001.0,2.094,5.216,2.047,10001.0,4.060000,4.704000,3.967000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14567174840840160391_kernel,1001.0,2.9760000000000004,6.4,2.8480000000000003,1001.0,2.948,3.84,2.8480000000000003,10001.0,2.975,8.8,2.8480000000000003,10001.0,2.975,8.8,2.8480000000000003,10001.0,2.972,6.272,2.848,10001.0,2.623,6.016,2.591,10001.0,5.166000,5.888000,5.023000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_5835276512760675131_kernel,1001.0,1.054,3.648,1.023,1001.0,1.062,3.999,1.024,10001.0,1.057,3.712,1.023,10001.0,1.057,3.712,1.023,10001.0,1.055,3.712,1.023,10001.0,1.077,12.928,1.023,10001.0,2.978000,3.456000,2.911000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_1705976372344909764_kernel,1001.0,3.634,6.816,3.551,1001.0,3.635,4.256,3.551,10001.0,3.629,7.104,3.52,10001.0,3.629,7.104,3.52,10001.0,3.631,8.128,3.519,10001.0,2.814,6.08,2.719,10001.0,5.398000,6.592000,4.544000
output_gpu,inceptionv3,Fused_Mul_Add_split_14033070269716264173_kernel,1001.0,1.237,4.0,1.215,1001.0,1.24,3.968,1.215,10001.0,1.235,4.16,1.215,10001.0,1.235,4.16,1.215,10001.0,1.235,4.0,1.215,10001.0,1.237,4.0,1.215,10001.0,3.179000,4.064000,3.135000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_13239088336427658660_kernel,1001.0,1.435,4.0,1.4069999999999998,1001.0,1.433,1.664,1.4069999999999998,10001.0,1.432,4.032,1.4069999999999998,10001.0,1.432,4.032,1.4069999999999998,10001.0,1.432,4.032,1.407,10001.0,1.428,4.224,1.407,10001.0,3.058000,3.488000,3.007000
output_gpu,inceptionv3,Fused_BiasAdd_13279601982287981454_kernel,1001.0,246.806,255.389,240.029,1001.0,165.753,168.893,165.149,10001.0,165.343,171.293,164.44500000000005,10001.0,165.343,171.293,164.44500000000005,10001.0,165.343,171.421,164.41299999999998,10001.0,165.349,169.47,164.41400000000002,10001.0,166.958,172.191,165.951
output_gpu,inceptionv3,Fused_Mul_Add_fusion_16592603670837833323_kernel,1001.0,1.4369999999999998,4.0,1.4069999999999998,1001.0,1.435,4.352,1.4069999999999998,10001.0,1.4340000000000002,4.063,1.4069999999999998,10001.0,1.4340000000000002,4.063,1.4069999999999998,10001.0,1.434,4.096,1.407,10001.0,1.428,4.032,1.407,10001.0,3.273000,3.936000,3.200000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_4902852373354436623_kernel,1001.0,1.266,3.744,1.247,1001.0,1.261,1.408,1.247,10001.0,1.263,3.808,1.247,10001.0,1.263,3.808,1.247,10001.0,1.263,3.808,1.247,10001.0,1.264,13.504,1.247,10001.0,3.076000,3.488000,3.008000
output_gpu,inceptionv3,Fused_ReduceSum_split_13799066988795076930_kernel,1001.0,5.026,8.031,4.9910000000000005,1001.0,4.9910000000000005,8.224,4.959,10001.0,2.506,5.312,2.464,10001.0,2.506,5.312,2.464,10001.0,1.82,4.736,1.791,10001.0,1.804,4.768,1.76,10001.0,3.580000,4.192000,3.519000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_10046278242332529249_kernel,1001.0,2.121,5.056,2.08,1001.0,2.087,2.752,2.048,10001.0,2.117,5.152,2.079,10001.0,2.117,5.152,2.079,10001.0,2.117,5.12,2.079,10001.0,2.097,9.792,2.047,10001.0,4.217000,4.928000,4.159000
output_gpu,inceptionv3,Fused_ReduceSum_split_16889214658628119107_kernel,1001.0,24.779,35.615,24.288,1001.0,24.842,36.127,24.32,10001.0,24.103,35.455,23.551,10001.0,24.103,35.455,23.551,10001.0,24.143,36.287,23.647,10001.0,24.165,35.551,23.679,10001.0,26.056000,36.0,25.632000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_12556238166537091384_kernel,1001.0,15.026,19.68,14.463,1001.0,15.012,17.792,14.464,10001.0,14.986,19.584,14.368,10001.0,14.986,19.584,14.368,10001.0,15.029000000000002,19.776,14.4,10001.0,14.488,19.808,13.952000000000002,10001.0,17.283,20.128,15.935
output_gpu,inceptionv3,Fused_Mul_Add_fusion_1814317443537360589_kernel,1001.0,1.325,4.064,1.311,1001.0,1.3259999999999998,1.7919999999999998,1.311,10001.0,1.3219999999999998,4.16,1.311,10001.0,1.3219999999999998,4.16,1.311,10001.0,1.323,4.16,1.311,10001.0,1.326,4.128,1.28,10001.0,3.116000,3.648000,3.039000
output_gpu,inceptionv3,Fused_BiasAdd_13149277205434948989_kernel,1001.0,85.28,90.143,84.51,1001.0,78.31,84.79899999999999,77.631,10001.0,78.433,84.542,77.342,10001.0,78.433,84.542,77.342,10001.0,78.455,85.054,77.439,10001.0,78.46799999999999,84.44699999999999,77.40700000000001,10001.0,80.596,84.0,79.61500000000001
output_gpu,inceptionv3,Fused_Add_Add_Add_split_17814005094904420575_kernel,1001.0,412.335,417.339,409.787,1001.0,412.768,1102.8010000000002,409.947,10001.0,413.674,418.648,409.6880000000001,10001.0,413.674,418.648,409.6880000000001,10001.0,413.61800000000005,416.696,409.6880000000001,10001.0,409.31,412.411,404.027,10001.0,413.719,416.252,410.461
output_gpu,inceptionv3,Fused_Mul_Add_fusion_16065661968153571171_kernel,1001.0,1.3019999999999998,4.0,1.279,1001.0,1.3119999999999998,4.256,1.279,10001.0,1.3,4.064,1.279,10001.0,1.3,4.064,1.279,10001.0,1.3,4.128,1.279,10001.0,1.304,4.096,1.248,10001.0,3.274000,3.808000,3.199000
output_gpu,inceptionv3,Fused_BiasAdd_8856965180129546514_kernel,1001.0,35.911,41.375,35.199,1001.0,32.751999999999995,39.136,32.319,10001.0,32.63,38.943000000000005,32.095,10001.0,32.63,38.943000000000005,32.095,10001.0,32.669000000000004,38.592,32.063,10001.0,32.599000000000004,1001.364,31.391,10001.0,34.912,39.424,33.728
output_gpu,inceptionv3,Fused_Mul_Add_split_1994986474691663287_kernel,1001.0,1.242,3.936,1.215,1001.0,1.243,3.968,1.215,10001.0,1.241,4.0,1.215,10001.0,1.241,4.0,1.215,10001.0,1.241,4.0,1.215,10001.0,1.241,4.032,1.215,10001.0,3.178000,4.160000,3.103000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_8413329165858166344_kernel,1001.0,1.324,4.064,1.311,1001.0,1.327,4.224,1.28,10001.0,1.3219999999999998,4.16,1.311,10001.0,1.3219999999999998,4.16,1.311,10001.0,1.322,4.128,1.311,10001.0,1.326,4.128,1.28,10001.0,3.340000,4.033000,3.264000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_4486789446329520211_kernel,1001.0,1.262,3.712,1.247,1001.0,1.263,4.096,1.247,10001.0,1.26,3.808,1.247,10001.0,1.26,3.808,1.247,10001.0,1.259,3.808,1.247,10001.0,1.258,7.423,1.247,10001.0,3.099000,3.616000,3.039000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_8327457158489234245_kernel,1001.0,1.435,4.0,1.4069999999999998,1001.0,1.429,1.664,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.433,4.095,1.407,10001.0,1.428,4.0,1.407,10001.0,3.277000,3.808000,3.200000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_513034331902445748_kernel,1001.0,12.726,16.672,11.935,1001.0,12.683,17.472,12.0,10001.0,12.7,16.735,12.128,10001.0,12.7,16.735,12.128,10001.0,12.7,16.831,12.032,10001.0,12.127,16.639999999999997,11.679,10001.0,14.918000,16.992000,13.792000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15235311143258630203_kernel,1001.0,13.029000000000002,17.183,12.448,1001.0,13.045,17.472,12.416,10001.0,13.038,17.791999999999994,12.48,10001.0,13.038,17.791999999999994,12.48,10001.0,13.04,17.087999999999994,12.544,10001.0,12.465,16.864,11.711,10001.0,14.964000,16.992000,14.336000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_13480478124256430834_kernel,1001.0,2.419,5.44,2.336,1001.0,2.404,2.9760000000000004,2.335,10001.0,2.4130000000000003,5.5360000000000005,2.3040000000000003,10001.0,2.4130000000000003,5.5360000000000005,2.3040000000000003,10001.0,2.412,5.664,2.335,10001.0,2.233,8.736,2.175,10001.0,4.232000,4.864000,4.127000
output_gpu,inceptionv3,Fused_BiasAdd_9958067436049620303_kernel,1001.0,40.331,45.023,39.807,1001.0,37.604,41.568000000000005,37.087,10001.0,37.704,44.191,37.055,10001.0,37.704,44.191,37.055,10001.0,37.724,44.223,37.119,10001.0,37.85,924.181,36.192,10001.0,39.940999999999995,44.096000000000004,39.231
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15228992338364253445_kernel,1001.0,10.981,13.407,10.4,1001.0,10.967,11.776,10.176,10001.0,10.970999999999998,13.376,10.496,10001.0,10.970999999999998,13.376,10.496,10001.0,10.967,13.504,10.208,10001.0,10.019,13.696,8.8,10001.0,12.999000,14.400000,12.448000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_3830299990378146088_kernel,1001.0,1.406,3.264,1.375,1001.0,1.396,1.7919999999999998,1.375,10001.0,1.404,3.36,1.375,10001.0,1.404,3.36,1.375,10001.0,1.404,3.327,1.375,10001.0,1.397,3.328,1.375,10001.0,3.263000,3.872000,3.199000
output_gpu,inceptionv3,Fused_Mul_Add_split_11056394319687388612_kernel,1001.0,1.422,3.968,1.4069999999999998,1001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.419,4.064,1.376,10001.0,1.419,4.064,1.376,10001.0,1.419,4.064,1.376,10001.0,1.428,4.032,1.407,10001.0,3.271000,3.744000,3.200000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_6569717702760831934_kernel,1001.0,3.772,7.04,3.68,1001.0,3.756,4.416,3.679,10001.0,3.767,7.007999999999999,3.679,10001.0,3.767,7.007999999999999,3.679,10001.0,3.768,7.072,3.648,10001.0,2.956,6.368,2.848,10001.0,5.595000,6.688000,5.471000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_2000949605676586887_kernel,1001.0,1.241,3.936,1.215,1001.0,1.237,1.6,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.0,1.215,10001.0,1.237,4.0,1.215,10001.0,2.969000,3.808000,2.911000
output_gpu,inceptionv3,Fused_Mul_Add_split_6892298629778330308_kernel,1001.0,1.435,4.0,1.4069999999999998,1001.0,1.43,4.032,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.432,4.256,1.407,10001.0,1.428,4.032,1.407,10001.0,3.282000,4.320000,3.199000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_6102306437225920894_kernel,1001.0,2.123,5.056,2.079,1001.0,2.091,2.624,2.047,10001.0,2.1180000000000003,5.184,2.079,10001.0,2.1180000000000003,5.184,2.079,10001.0,2.119,8.543,2.079,10001.0,2.087,5.056,2.047,10001.0,3.934000,5.216000,3.871000
output_gpu,inceptionv3,Fused_Add_Add_Add_split_2909241112688613299_kernel,1001.0,985.299,988.82,977.94,1001.0,987.128,1812.518,978.61,10001.0,986.579,989.773,977.965,10001.0,986.579,989.773,977.965,10001.0,986.572,989.038,977.806,10001.0,974.076,1705.546,962.835,10001.0,983.011,987.0640000000001,976.952
output_gpu,inceptionv3,Fused_ReduceSum_split_6643930332904336521_kernel,1001.0,39.623000000000005,50.07899999999999,37.984,1001.0,39.595,47.327,38.08,10001.0,36.545,48.639,35.935,10001.0,36.545,48.639,35.935,10001.0,36.517,48.895,35.935,10001.0,36.498000000000005,48.703,35.872,10001.0,38.568,47.808,38.112000
output_gpu,inceptionv3,Fused_ReduceSum_split_14089211308293203352_kernel,1001.0,20.634,31.039,20.0,1001.0,20.609,30.943,20.127,10001.0,18.891,29.6,18.335,10001.0,18.891,29.6,18.335,10001.0,18.907,29.663,18.368,10001.0,18.823,29.568,18.367,10001.0,20.748000,29.950999999999997,20.383000
output_gpu,inceptionv3,Fused_BiasAdd_3734067456933364732_kernel,1001.0,72.168,76.703,70.655,1001.0,56.724,60.575,56.223,10001.0,56.547,62.111,55.775,10001.0,56.547,62.111,55.775,10001.0,56.562,62.303,55.839,10001.0,56.57,768.0219999999999,55.776,10001.0,58.465999999999994,62.431,57.76
output_gpu,inceptionv3,Fused_Mul_Add_fusion_3488738075935366573_kernel,1001.0,2.285,5.408,2.239,1001.0,2.244,2.8160000000000003,2.207,10001.0,2.278,5.247999999999999,2.239,10001.0,2.278,5.247999999999999,2.239,10001.0,2.279,5.344,2.239,10001.0,2.243,5.216,2.207,10001.0,4.369000,5.120000,4.288000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_16901929356243398884_kernel,1001.0,1.422,4.0,1.375,1001.0,1.4340000000000002,4.319,1.4069999999999998,10001.0,1.42,4.032,1.376,10001.0,1.42,4.032,1.376,10001.0,1.42,4.032,1.376,10001.0,1.428,4.064,1.407,10001.0,3.043000,3.520000,3.007000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15213339450931162760_kernel,1001.0,2.418,5.631,2.335,1001.0,2.404,3.04,2.335,10001.0,2.414,5.568,2.3040000000000003,10001.0,2.414,5.568,2.3040000000000003,10001.0,2.412,5.696,2.304,10001.0,2.23,9.728,2.176,10001.0,4.250000,4.897000,4.097000
output_gpu,inceptionv3,Fused_BiasAdd_11245559101894879851_kernel,1001.0,1278.286,1290.8,1265.904,1001.0,975.223,1644.489,969.33,10001.0,973.727,978.829,963.374,10001.0,973.727,978.829,963.374,10001.0,973.758,979.279,962.286,10001.0,974.009,1920.935,964.148,10001.0,972.386,978.52,962.328
output_gpu,inceptionv3,Fused_Mul_Add_split_13183654708381953577_kernel,1001.0,1.436,4.096,1.4069999999999998,1001.0,1.431,4.351,1.4069999999999998,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.434,4.064,1.407,10001.0,1.428,4.032,1.407,10001.0,3.274000,3.744000,3.200000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_12548378208067680142_kernel,1001.0,2.609,5.632000000000001,2.527,1001.0,2.605,3.488,2.527,10001.0,2.605,5.759,2.495,10001.0,2.605,5.759,2.495,10001.0,2.603,5.856,2.496,10001.0,2.379,9.728,2.335,10001.0,4.804000,5.536000,4.640000
output_gpu,inceptionv3,Fused_BiasAdd_14475292599214658215_kernel,1001.0,18.346,22.464,17.791,1001.0,17.093,21.632,16.64,10001.0,17.2,24.255,16.608,10001.0,17.2,24.255,16.608,10001.0,17.218,24.159,16.608,10001.0,17.227,23.744,16.735,10001.0,19.382,23.808,18.784
output_gpu,inceptionv3,Fused_Mul_Add_split_16999387465648136579_kernel,1001.0,1.237,4.096,1.215,1001.0,1.24,3.968,1.215,10001.0,1.235,3.999,1.215,10001.0,1.235,3.999,1.215,10001.0,1.237,4.0,1.215,10001.0,1.237,4.0,1.215,10001.0,3.174000,3.712000,3.135000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14988762206562469577_kernel,1001.0,16.313,21.6,15.68,1001.0,16.305,19.071,15.871,10001.0,16.34,21.856,15.839,10001.0,16.34,21.856,15.839,10001.0,16.334999999999994,21.791,15.808,10001.0,15.752,21.376,15.232,10001.0,18.674,21.695,17.695
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.224,3.008,1.183,1001.0,1.221,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.072,1.183,10001.0,1.203,3.04,1.183,10001.0,2.884000,3.393000,2.815000
output_gpu,inceptionv3,Fused_BiasAdd_15730224028876181167_kernel,1001.0,62.231,67.807,61.599,1001.0,53.274,56.767,52.351000000000006,10001.0,53.221,58.719,52.127,10001.0,53.221,58.719,52.127,10001.0,53.221,60.223,52.095,10001.0,53.365,763.094,51.743,10001.0,54.856,58.815,53.983000000000004
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15705708750703986266_kernel,1001.0,1.3219999999999998,4.064,1.28,1001.0,1.32,1.664,1.28,10001.0,1.32,4.128,1.279,10001.0,1.32,4.128,1.279,10001.0,1.321,4.288,1.279,10001.0,1.322,8.672,1.279,10001.0,3.315000,3.872000,3.231000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_4261862423109646487_kernel,1001.0,1.063,3.648,1.024,1001.0,1.064,4.032,1.023,10001.0,1.064,3.807,1.023,10001.0,1.064,3.807,1.023,10001.0,1.064,3.776,1.023,10001.0,1.069,9.728,1.023,10001.0,2.995000,3.457000,2.943000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_11848777849059956305_kernel,1001.0,1.24,3.744,1.215,1001.0,1.093,1.408,1.055,10001.0,1.238,3.808,1.215,10001.0,1.238,3.808,1.215,10001.0,1.237,3.776,1.215,10001.0,1.266,12.864,1.247,10001.0,2.907000,3.296000,2.815000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15203407968642644530_kernel,1001.0,1.265,3.968,1.247,1001.0,1.26,1.632,1.247,10001.0,1.263,4.064,1.247,10001.0,1.263,4.064,1.247,10001.0,1.262,4.064,1.247,10001.0,1.264,8.704,1.247,10001.0,3.007000,7.008000,2.943000
output_gpu,inceptionv3,Fused_Add_Add_split_17757495218590450874_kernel,1001.0,882.255,885.044,872.6610000000001,1001.0,882.905,1610.5379999999998,873.748,10001.0,882.8470000000002,885.4870000000002,872.7189999999999,10001.0,882.8470000000002,885.4870000000002,872.7189999999999,10001.0,882.6560000000001,885.456,873.0089999999999,10001.0,864.827,1588.62,853.557,10001.0,879.448,882.456,877.24
output_gpu,inceptionv3,Fused_BiasAdd_17969653275057728157_kernel,1001.0,114.049,118.879,109.406,1001.0,83.727,86.91,83.07,10001.0,83.765,90.014,83.006,10001.0,83.765,90.014,83.006,10001.0,83.757,89.91799999999998,82.975,10001.0,83.786,90.047,83.00699999999999,10001.0,85.673,89.407,84.831
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14331974205342191976_kernel,1001.0,1.276,4.0,1.247,1001.0,1.27,1.632,1.247,10001.0,1.274,4.064,1.247,10001.0,1.274,4.064,1.247,10001.0,1.274,4.096,1.247,10001.0,1.274,12.832,1.247,10001.0,3.034000,3.712000,2.975000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_3646968818818513730_kernel,1001.0,1.262,3.711,1.247,1001.0,1.264,4.064,1.247,10001.0,1.26,3.776,1.247,10001.0,1.26,3.776,1.247,10001.0,1.26,3.776,1.247,10001.0,1.262,9.856,1.247,10001.0,3.083000,4.225000,3.007000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_16700083812227594731_kernel,1001.0,38.234,43.679,37.407,1001.0,38.936,800.47,36.319,10001.0,38.108,43.424,37.311,10001.0,38.108,43.424,37.311,10001.0,38.1,43.999,37.119,10001.0,36.743,42.496,36.096,10001.0,40.364,43.839000000000006,39.199999999999996
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14498515757243566400_kernel,1001.0,6.81,10.656,6.336,1001.0,6.803999999999999,8.543,6.4,10001.0,6.819,10.88,6.367999999999999,10001.0,6.819,10.88,6.367999999999999,10001.0,6.818,10.656,6.4,10001.0,5.522,9.888,4.832,10001.0,8.711000,10.944000,8.192000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_962809096906056009_kernel,1001.0,2.242,5.184,2.144,1001.0,2.216,5.5360000000000005,2.144,10001.0,2.236,5.247999999999999,2.112,10001.0,2.236,5.247999999999999,2.112,10001.0,2.237,5.248,2.112,10001.0,2.105,5.12,2.048,10001.0,4.410000,5.184000,4.288000
output_gpu,inceptionv3,Fused_ReduceSum_split_13063284910209712600_kernel,1001.0,73.738,85.24600000000001,72.128,1001.0,73.714,84.03,72.319,10001.0,70.05699999999999,81.278,69.503,10001.0,70.05699999999999,81.278,69.503,10001.0,70.127,81.63,69.535,10001.0,70.081,82.079,69.568,10001.0,72.321,82.24,71.839000
output_gpu,inceptionv3,Fused_Mul_Mul_Add_split_2146321145788151391_kernel,1001.0,1.386,3.36,1.375,1001.0,1.378,3.4560000000000004,1.344,10001.0,1.3869999999999998,9.408,1.375,10001.0,1.3869999999999998,9.408,1.375,10001.0,1.384,3.456,1.343,10001.0,1.376,3.424,1.343,10001.0,3.245000,3.968000,3.167000
output_gpu,inceptionv3,Fused_Sub_Exp_ReduceSum_split_15937433446338712464_kernel,1001.0,7.571000000000001,10.432,7.519,1001.0,7.537999999999999,10.496,7.456,10001.0,3.09,6.88,3.039,10001.0,3.09,6.88,3.039,10001.0,3.086,6.752,3.039,10001.0,3.089,6.687,3.039,10001.0,4.752000,5.792000,4.672000
output_gpu,inceptionv3,Fused_Mul_Add_split_8290303533966739766_kernel,1001.0,1.24,3.968,1.215,1001.0,1.236,4.192,1.215,10001.0,1.238,4.032,1.215,10001.0,1.238,4.032,1.215,10001.0,1.237,4.064,1.215,10001.0,1.233,4.032,1.215,10001.0,3.189000,3.809000,3.135000
output_gpu,inceptionv3,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.423,1.3430000000000002,1001.0,1.221,1.76,1.1840000000000002,10001.0,1.423,9.568,1.376,10001.0,1.423,9.568,1.376,10001.0,1.421,3.52,1.376,10001.0,1.242,1.824,1.215,10001.0,3.050000,3.743000,3.007000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_17592992270969361746_kernel,1001.0,24.592,30.496,23.904,1001.0,24.566,28.0,23.967,10001.0,24.562,30.207,23.808000000000003,10001.0,24.562,30.207,23.808000000000003,10001.0,24.528,30.495,23.871,10001.0,23.944,29.535,23.327,10001.0,27.005000000000003,30.656,26.048000000000002
output_gpu,inceptionv3,Fused_BiasAdd_18050471052323754735_kernel,1001.0,29.435,34.847,28.831,1001.0,27.373,31.456,26.88,10001.0,27.513,33.6,27.007,10001.0,27.513,33.6,27.007,10001.0,27.533,33.535000000000004,26.943,10001.0,27.506,31.935,26.880000000000003,10001.0,29.701,33.984,29.119
output_gpu,inceptionv3,Fused_ReduceSum_split_1218863806052220879_kernel,1001.0,31.702,40.096,30.495,1001.0,31.696,40.832,30.687,10001.0,25.335,37.568000000000005,24.831,10001.0,25.335,37.568000000000005,24.831,10001.0,25.323,37.023,24.8,10001.0,25.401,37.632,24.831,10001.0,27.666000,37.856,26.624000
output_gpu,inceptionv3,Fused_ReduceSum_split_11470169223351352124_kernel,1001.0,30.161,42.688,29.568,1001.0,30.3,41.663,29.631,10001.0,29.387,40.128,28.863000000000003,10001.0,29.387,40.128,28.863000000000003,10001.0,29.298,40.127,28.768,10001.0,29.293,40.544,28.768,10001.0,31.475999999999996,40.896,30.368000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14439200210288975919_kernel,1001.0,2.175,5.056,2.112,1001.0,2.15,2.752,2.08,10001.0,2.168,5.152,2.08,10001.0,2.168,5.152,2.08,10001.0,2.168,5.216,2.08,10001.0,2.101,9.856,2.047,10001.0,4.393000,5.248000,4.287000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_17413528510058875266_kernel,1001.0,1.4509999999999998,4.0,1.408,1001.0,1.444,1.663,1.408,10001.0,1.449,4.16,1.408,10001.0,1.449,4.16,1.408,10001.0,1.449,4.096,1.408,10001.0,1.444,4.064,1.407,10001.0,3.290000,3.712000,3.231000
output_gpu,inceptionv3,Fused_BiasAdd_2509283587668792037_kernel,1001.0,3760.993,3797.04,3749.136,1001.0,1362.058,2023.46,1355.021,10001.0,1360.639,1366.95,1348.007,10001.0,1360.639,1366.95,1348.007,10001.0,1360.579,1367.144,1346.312,10001.0,1360.963,2089.029,1347.694,10001.0,1357.858,1366.165,1345.461
output_gpu,inceptionv3,Fused_Mul_Add_split_5709332814874204631_kernel,1001.0,1.453,4.0,1.439,1001.0,1.4480000000000002,4.032,1.408,10001.0,1.449,4.096,1.408,10001.0,1.449,4.096,1.408,10001.0,1.45,4.064,1.439,10001.0,1.445,4.064,1.407,10001.0,3.293000,3.712000,3.231000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_18098645137375259155_kernel,1001.0,32.075,37.536,31.455,1001.0,32.086,35.263000000000005,31.423,10001.0,31.945,37.023,31.199,10001.0,31.945,37.023,31.199,10001.0,32.029,37.215,31.199,10001.0,30.955,36.896,30.336,10001.0,34.477000000000004,37.760000000000005,33.375
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_6127508607315633571_kernel,1001.0,1.24,3.712,1.215,1001.0,1.262,4.063,1.247,10001.0,1.237,3.808,1.215,10001.0,1.237,3.808,1.215,10001.0,1.237,3.776,1.215,10001.0,1.263,9.568,1.247,10001.0,3.077000,3.457000,3.008000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_801570573448254420_kernel,1001.0,2.61,5.696000000000001,2.528,1001.0,2.603,3.264,2.527,10001.0,2.603,5.76,2.496,10001.0,2.603,5.76,2.496,10001.0,2.605,5.888,2.496,10001.0,2.373,5.472,2.335,10001.0,4.809000,5.536000,4.672000
output_gpu,inceptionv3,Fused_BiasAdd_14556142645920472519_kernel,1001.0,1544.04,1572.236,1527.469,1001.0,998.601,1857.766,989.426,10001.0,997.89,1002.733,988.173,10001.0,997.89,1002.733,988.173,10001.0,997.885,1002.35,988.686,10001.0,998.162,1701.4499999999998,988.915,10001.0,997.6819999999999,1004.056,988.376
output_gpu,inceptionv3,Fused_Add_Add_Add_split_17864913665996881289_kernel,1001.0,698.325,702.135,692.952,1001.0,702.268,1452.172,698.135,10001.0,699.352,701.7460000000001,693.138,10001.0,699.352,701.7460000000001,693.138,10001.0,699.2560000000001,701.9720000000002,693.075,10001.0,690.52,695.638,688.567,10001.0,697.461,702.9060000000001,695.354
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14427111883498557498_kernel,1001.0,1.864,4.672,1.824,1001.0,1.829,2.3680000000000003,1.791,10001.0,1.861,4.767,1.823,10001.0,1.861,4.767,1.823,10001.0,1.859,4.768,1.823,10001.0,1.838,13.536,1.791,10001.0,3.910000,4.576000,3.839000
output_gpu,inceptionv3,Fused_ReduceSum_split_14718931859860011190_kernel,1001.0,70.825,81.15100000000001,68.51100000000001,1001.0,70.27,81.567,68.191,10001.0,66.271,77.502,65.791,10001.0,66.271,77.502,65.791,10001.0,66.20700000000001,77.374,65.822,10001.0,66.268,77.183,65.759,10001.0,68.37,77.567,67.871000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_4631655107095158369_kernel,1001.0,1.238,3.968,1.215,1001.0,1.243,4.128,1.215,10001.0,1.236,4.032,1.215,10001.0,1.236,4.032,1.215,10001.0,1.236,4.0,1.215,10001.0,1.238,4.0,1.215,10001.0,3.193000,3.776000,3.135000
output_gpu,inceptionv3,Fused_BiasAdd_1966064602787433544_kernel,1001.0,2605.112,2616.927,2588.479,1001.0,1937.797,1946.468,1917.221,10001.0,1938.656,1947.418,1917.531,10001.0,1938.656,1947.418,1917.531,10001.0,1938.707,1946.109,1920.413,10001.0,1939.328,2789.884,1931.175,10001.0,1933.9869999999999,1944.079,1914.799
output_gpu,inceptionv3,Fused_Mul_Add_fusion_4097863452421524268_kernel,1001.0,1.277,3.968,1.247,1001.0,1.287,2.112,1.247,10001.0,1.274,4.096,1.247,10001.0,1.274,4.096,1.247,10001.0,1.275,4.096,1.247,10001.0,1.272,4.192,1.247,10001.0,3.241000,3.807000,3.167000
output_gpu,inceptionv3,Fused_ReduceSum_split_5124766514423059357_kernel,1001.0,50.019,60.447,48.927,1001.0,49.97,58.335,48.864,10001.0,47.593,59.455,47.071000000000005,10001.0,47.593,59.455,47.071000000000005,10001.0,47.56,59.83900000000001,47.071,10001.0,47.562,60.0,47.039,10001.0,49.998,60.352000000000004,48.928000
output_gpu,inceptionv3,Fused_Reshape_DropoutGrad_fusion_6233726887306142852_kernel,1001.0,2.647,5.728,2.56,1001.0,2.659,5.888,2.56,10001.0,2.6460000000000004,9.631,2.528,10001.0,2.6460000000000004,9.631,2.528,10001.0,2.639,5.792,2.528,10001.0,2.387,5.632,2.336,10001.0,4.452000,5.088000,4.320000
output_gpu,inceptionv3,Fused_Add_Add_Add_split_429414724259211202_kernel,1001.0,1108.206,1112.562,1099.698,1001.0,1110.159,1809.063,1101.489,10001.0,1109.7520000000004,1112.49,1100.107,10001.0,1109.7520000000004,1112.49,1100.107,10001.0,1109.7220000000002,1113.004,1099.788,10001.0,1095.131,1821.992,1082.034,10001.0,1106.3419999999999,1114.486,1103.895
output_gpu,inceptionv3,Fused_Mul_Add_fusion_14281083276415648681_kernel,1001.0,1.99,5.023,1.951,1001.0,1.964,2.496,1.92,10001.0,1.988,4.959,1.951,10001.0,1.988,4.959,1.951,10001.0,1.988,4.928,1.951,10001.0,1.968,7.872,1.92,10001.0,3.778000,4.416000,3.264000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_17045368157683029199_kernel,1001.0,1.3019999999999998,4.032,1.279,1001.0,1.306,4.288,1.248,10001.0,1.3,4.032,1.279,10001.0,1.3,4.032,1.279,10001.0,1.3,4.128,1.279,10001.0,1.304,4.031,1.248,10001.0,3.055000,3.584000,3.007000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_8680289308457958643_kernel,1001.0,1.908,4.736000000000001,1.856,1001.0,1.875,2.3680000000000003,1.824,10001.0,1.904,4.96,1.855,10001.0,1.904,4.96,1.855,10001.0,1.903,4.832,1.856,10001.0,1.875,4.8,1.824,10001.0,3.964000,4.608000,3.872000
output_gpu,inceptionv3,Fused_BroadcastTo_inplace_assign_builder_187408248622839644_kernel,1001.0,1.224,2.9760000000000004,1.1840000000000002,1001.0,1.223,3.04,1.1840000000000002,10001.0,1.223,3.04,1.183,10001.0,1.223,3.04,1.183,10001.0,1.224,3.2,1.183,10001.0,1.221,9.76,1.183,10001.0,2.894000,3.456000,2.815000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_12508286546813904830_kernel,1001.0,2.24,5.28,2.144,1001.0,2.218,2.8160000000000003,2.143,10001.0,2.237,5.408,2.143,10001.0,2.237,5.408,2.143,10001.0,2.237,5.343,2.112,10001.0,2.108,5.088,2.079,10001.0,4.400000,7.136000,4.288000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_1120660004227899215_kernel,1001.0,4.354,8.224,4.159,1001.0,4.319,5.024,4.096,10001.0,4.354,7.776,4.127,10001.0,4.354,7.776,4.127,10001.0,4.353,7.712000000000001,4.096,10001.0,3.154,9.824,3.071,10001.0,6.287000,8.065000,6.144000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_15386966910619796443_kernel,1001.0,3.779,6.88,3.68,1001.0,3.776,4.6080000000000005,3.68,10001.0,3.768,7.167999999999999,3.648,10001.0,3.768,7.167999999999999,3.648,10001.0,3.767,7.232,3.648,10001.0,2.969,8.8,2.879,10001.0,6.047000,6.880000,5.919000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_4314068573008571148_kernel,1001.0,1.266,3.968,1.247,1001.0,1.275,4.672,1.247,10001.0,1.264,4.032,1.247,10001.0,1.264,4.032,1.247,10001.0,1.263,4.0,1.247,10001.0,1.269,4.032,1.247,10001.0,3.240000,3.777000,3.167000
output_gpu,inceptionv3,Fused_Mul_Add_split_16260080095868610887_kernel,1001.0,1.4509999999999998,4.032,1.439,1001.0,1.4480000000000002,4.064,1.4069999999999998,10001.0,1.45,4.096,1.439,10001.0,1.45,4.096,1.439,10001.0,1.45,4.096,1.439,10001.0,1.445,4.064,1.407,10001.0,3.285000,3.745000,3.231000
output_gpu,inceptionv3,Fused_Mul_Add_split_13064106691631487688_kernel,1001.0,1.435,4.0,1.4069999999999998,1001.0,1.43,4.032,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.432,4.064,1.4069999999999998,10001.0,1.432,4.096,1.407,10001.0,1.428,4.032,1.407,10001.0,3.282000,4.512000,3.200000
output_gpu,inceptionv3,Fused_Mul_Add_fusion_12240491293494244335_kernel,1001.0,1.305,4.0,1.279,1001.0,1.3159999999999998,4.6080000000000005,1.279,10001.0,1.3030000000000002,4.096,1.279,10001.0,1.3030000000000002,4.096,1.279,10001.0,1.303,4.255,1.279,10001.0,1.31,9.119,1.279,10001.0,3.275000,3.840000,3.200000
output_gpu,SSD,Fused_Cast_Transpose_fusion_212472623483218232_kernel,1001.0,1.308,4.0,1.279,1001.0,1.4469999999999998,2.3040000000000003,1.279,10001.0,1.272,4.288,1.247,10001.0,1.272,4.288,1.247,10001.0,1.271,4.064,1.247,10001.0,1.112,8.416,1.087,10001.0,3.045000,6.080000,2.975000
output_gpu,SSD,Fused_Reshape_Cast_Add_split_2164450695178392935_kernel,1001.0,1.616,4.384,1.599,1001.0,1.651,2.464,1.567,10001.0,1.598,4.704,1.567,10001.0,1.598,4.704,1.567,10001.0,1.597,4.448,1.567,10001.0,1.611,6.464,1.599,10001.0,3.643000,4.224000,3.583000
output_gpu,SSD,Fused_Add_fusion_17405102274872200439_kernel,1001.0,39.07,45.184,38.431,1001.0,39.063,42.623000000000005,38.367,10001.0,38.069,45.119,37.151,10001.0,38.069,45.119,37.151,10001.0,38.468,44.64,37.183,10001.0,42.231,48.576,38.88,10001.0,42.305,45.599000000000004,41.12
output_gpu,SSD,Fused_Cast_Add_split_13953401452310958221_kernel,1001.0,1.395,4.128,1.375,1001.0,1.501,2.3680000000000003,1.375,10001.0,1.4069999999999998,4.64,1.375,10001.0,1.4069999999999998,4.64,1.375,10001.0,1.405,4.288,1.375,10001.0,1.214,1.6,1.183,10001.0,3.162000,3.712000,3.103000
output_gpu,SSD,Fused_AddN_18092657967754340377_kernel,1001.0,1.83,4.64,1.791,1001.0,1.883,7.2,1.823,10001.0,1.826,4.928,1.791,10001.0,1.826,4.928,1.791,10001.0,1.827,4.64,1.791,10001.0,1.843,4.992,1.823,10001.0,3.603000,4.192000,3.551000
output_gpu,SSD,Fused_Add_fusion_16511057244010105501_kernel,1001.0,3.703,6.496,3.647,1001.0,3.719,4.224,3.679,10001.0,3.694,6.912000000000001,3.647,10001.0,3.694,6.912000000000001,3.647,10001.0,3.697,6.624,3.647,10001.0,4.819,5.44,4.576,10001.0,5.918000,7.072000,5.823000
output_gpu,SSD,Fused_Cast_Transpose_fusion_7122958448096152245_kernel,1001.0,1.254,3.968,1.216,1001.0,1.374,5.792000000000001,1.215,10001.0,1.232,4.256,1.215,10001.0,1.232,4.256,1.215,10001.0,1.233,4.0,1.215,10001.0,1.234,1.6,1.215,10001.0,3.179000,3.712000,3.103000
output_gpu,SSD,Fused_Cast_split_12477723516016570265_kernel,1001.0,1.662,4.416,1.631,1001.0,1.785,4.416,1.631,10001.0,1.658,4.736000000000001,1.631,10001.0,1.658,4.736000000000001,1.631,10001.0,1.657,4.479,1.631,10001.0,1.664,2.176,1.631,10001.0,3.680000,4.289000,3.615000
output_gpu,SSD,Fused_Cast_Add_split_12997724751741360116_kernel,1001.0,23.878,27.615,23.392,1001.0,20.363,26.751,19.904,10001.0,22.675,29.279,21.664,10001.0,22.675,29.279,21.664,10001.0,22.697,28.512,21.632,10001.0,22.667,27.168000000000003,21.696,10001.0,25.487,29.503999999999998,23.936
output_gpu,SSD,Fused_Add_fusion_12244787375137590621_kernel,1001.0,13.640999999999998,17.28,13.312,1001.0,13.635,14.976,13.247,10001.0,13.595999999999998,17.631999999999994,12.896,10001.0,13.595999999999998,17.631999999999994,12.896,10001.0,13.585,17.247,12.832,10001.0,15.433,17.151,14.528,10001.0,15.486000,17.184000,14.816000
output_gpu,SSD,Fused_Cast_split_3965286068359590110_kernel,1001.0,1.86,4.768,1.823,1001.0,1.878,2.3680000000000003,1.824,10001.0,1.855,5.056,1.823,10001.0,1.855,5.056,1.823,10001.0,1.856,4.736,1.823,10001.0,1.866,2.432,1.823,10001.0,3.928000,4.543000,3.840000
output_gpu,SSD,Fused_Cast_split_7081845129889074332_kernel,1001.0,1.4480000000000002,4.192,1.408,1001.0,1.668,3.424,1.408,10001.0,1.446,4.704,1.4069999999999998,10001.0,1.446,4.704,1.4069999999999998,10001.0,1.445,4.448,1.407,10001.0,1.48,2.336,1.408,10001.0,3.460000,4.192000,3.391000
output_gpu,SSD,Fused_Cast_split_3210942441325238138_kernel,1001.0,1.422,4.128,1.376,1001.0,1.533,2.464,1.376,10001.0,1.421,8.896,1.375,10001.0,1.421,8.896,1.375,10001.0,1.419,4.224,1.376,10001.0,1.426,1.823,1.407,10001.0,3.435000,4.096000,3.359000
output_gpu,SSD,Fused_Cast_split_14399860012028013244_kernel,1001.0,1.234,4.096,1.215,1001.0,1.329,3.935,1.215,10001.0,1.235,8.831,1.215,10001.0,1.235,8.831,1.215,10001.0,1.232,4.0,1.215,10001.0,1.232,1.632,1.215,10001.0,3.168000,3.712000,3.103000
output_gpu,SSD,Fused_Cast_ReduceSum_split_6780797392119313229_kernel,1001.0,4.923,7.967,4.864,1001.0,4.924,5.5360000000000005,4.864,10001.0,1.951,5.376,1.919,10001.0,1.951,5.376,1.919,10001.0,1.95,4.96,1.919,10001.0,1.952,2.72,1.919,10001.0,3.951000,4.703000,3.871000
output_gpu,SSD,Fused_Add_fusion_10374564141404430261_kernel,1001.0,9.475,12.608,9.184,1001.0,9.505,12.8,9.152,10001.0,9.464,12.832,9.152,10001.0,9.464,12.832,9.152,10001.0,9.468,12.575,9.088,10001.0,10.721,11.744,10.272,10001.0,11.119000,12.736000,10.688000
output_gpu,SSD,Fused_Cast_split_9823541193159402167_kernel,1001.0,1.268,4.16,1.247,1001.0,1.54,2.912,1.247,10001.0,1.265,4.352,1.247,10001.0,1.265,4.352,1.247,10001.0,1.264,4.032,1.247,10001.0,1.266,2.176,1.247,10001.0,3.240000,3.808000,3.167000
output_gpu,SSD,Fused_Cast_split_10912361475772358675_kernel,1001.0,1.399,4.128,1.375,1001.0,1.582,4.96,1.375,10001.0,1.396,7.968,1.375,10001.0,1.396,7.968,1.375,10001.0,1.396,4.192,1.375,10001.0,1.402,4.576,1.375,10001.0,3.175000,3.680000,3.103000
output_gpu,SSD,Fused_Add_split_12244787375137590621_kernel,1001.0,13.637,17.344,13.183,1001.0,13.636,15.424,13.215,10001.0,13.63,17.503999999999998,13.087,10001.0,13.63,17.503999999999998,13.087,10001.0,13.646,17.343999999999998,13.184,10001.0,15.172,16.832,14.4,10001.0,15.285000,17.056000,14.848000
output_gpu,SSD,Fused_Reshape_Cast_fusion_14247810158409349596_kernel,1001.0,34.669000000000004,37.632,34.623000000000005,1001.0,2.633,5.408,2.464,10001.0,2.376,5.568,2.303,10001.0,2.376,5.568,2.303,10001.0,2.378,5.28,2.303,10001.0,2.283,2.784,2.239,10001.0,4.486000,5.248000,4.352000
output_gpu,SSD,Fused_Cast_Transpose_fusion_753074960718664091_kernel,1001.0,1.283,4.32,1.248,1001.0,1.399,2.3680000000000003,1.248,10001.0,1.249,4.352,1.215,10001.0,1.249,4.352,1.215,10001.0,1.248,4.224,1.215,10001.0,1.249,2.656,1.215,10001.0,3.208000,3.873000,3.135000
output_gpu,SSD,Fused_Cast_Add_split_10279557501775696455_kernel,1001.0,1.812,4.6080000000000005,1.791,1001.0,1.822,5.088,1.76,10001.0,1.811,5.024,1.791,10001.0,1.811,5.024,1.791,10001.0,1.813,4.672,1.76,10001.0,1.82,2.24,1.791,10001.0,3.848000,4.544000,3.775000
output_gpu,SSD,Fused_Cast_ReduceSum_split_9843924338213189397_kernel,1001.0,63.964,66.87899999999999,63.839,1001.0,63.951,64.415,63.807,10001.0,6.41,11.68,6.303,10001.0,6.41,11.68,6.303,10001.0,6.898,11.550999999999998,6.559,10001.0,6.917,9.855,6.592,10001.0,9.146000,11.232000,7.712000
output_gpu,SSD,Fused_Cast_split_17598385420630154980_kernel,1001.0,4.37,7.584,4.287,1001.0,4.395,7.584,4.288,10001.0,4.365,7.872000000000001,4.256,10001.0,4.365,7.872000000000001,4.256,10001.0,4.367,7.647,4.256,10001.0,7.303,7.904,7.072,10001.0,6.696000,7.327000,6.527000
output_gpu,SSD,Fused_Cast_split_178665563794598391_kernel,1001.0,1.238,3.936,1.215,1001.0,1.605,7.007999999999999,1.215,10001.0,1.238,11.295,1.215,10001.0,1.238,11.295,1.215,10001.0,1.237,4.0,1.215,10001.0,1.128,8.896,1.087,10001.0,3.218000,3.744000,3.167000
output_gpu,SSD,Fused_Cast_Transpose_fusion_6280342227807530446_kernel,1001.0,1.287,4.192,1.247,1001.0,1.409,2.432,1.248,10001.0,1.27,4.288,1.247,10001.0,1.27,4.288,1.247,10001.0,1.269,4.096,1.247,10001.0,1.287,2.08,1.247,10001.0,2.998000,3.552000,2.943000
output_gpu,SSD,Fused_Cast_split_3945175514709277030_kernel,1001.0,1.273,3.968,1.247,1001.0,1.487,5.568,1.247,10001.0,1.271,4.448,1.247,10001.0,1.271,4.448,1.247,10001.0,1.27,4.384,1.247,10001.0,1.272,1.664,1.247,10001.0,3.235000,3.776000,3.167000
output_gpu,SSD,Fused_Cast_Add_split_2857516831293226427_kernel,1001.0,2.382,5.312,2.335,1001.0,1.911,2.4,1.824,10001.0,7.552,10.623,7.359,10001.0,7.552,10.623,7.359,10001.0,7.549,10.272,7.359,10001.0,6.507,7.008,6.368,10001.0,8.911000,10.304000,8.768000
output_gpu,SSD,Fused_Cast_split_9026640484114071927_kernel,1001.0,2.076,4.896,2.047,1001.0,2.081,4.992,2.047,10001.0,2.071,5.247999999999999,2.016,10001.0,2.071,5.247999999999999,2.016,10001.0,2.073,4.992,2.047,10001.0,2.081,2.656,2.047,10001.0,4.150000,4.800000,4.063000
output_gpu,SSD,Fused_Add_split_16511057244010105501_kernel,1001.0,3.701,6.559,3.648,1001.0,3.712,4.192,3.647,10001.0,3.699,6.912000000000001,3.647,10001.0,3.699,6.912000000000001,3.647,10001.0,3.697,6.624,3.647,10001.0,4.816,5.536,4.512,10001.0,5.409000,6.080000,5.343000
output_gpu,SSD,Fused_Reshape_Cast_Add_split_3487764587288445513_kernel,1001.0,1.246,3.968,1.215,1001.0,1.602,4.416,1.216,10001.0,1.244,4.32,1.215,10001.0,1.244,4.32,1.215,10001.0,1.243,4.064,1.215,10001.0,1.261,2.112,1.215,10001.0,3.203000,3.777000,3.135000
output_gpu,SSD,Fused_Cast_Transpose_fusion_14929232515520948565_kernel,1001.0,1.285,4.0,1.247,1001.0,1.371,2.432,1.247,10001.0,1.256,4.256,1.215,10001.0,1.256,4.256,1.215,10001.0,1.257,4.032,1.215,10001.0,1.095,2.848,1.055,10001.0,3.025000,3.616000,2.943000
output_gpu,SSD,Fused_Cast_ReduceSum_split_18161176003913693487_kernel,1001.0,8.845,11.904000000000002,8.799,1001.0,8.847999999999999,13.152,8.8,10001.0,2.512,6.72,2.495,10001.0,2.512,6.72,2.495,10001.0,2.511,6.464,2.464,10001.0,2.51,4.095,2.463,10001.0,4.540000,6.112000,4.479000
output_gpu,SSD,Fused_Cast_split_2912887675524452888_kernel,1001.0,1.251,4.096,1.216,1001.0,1.376,2.656,1.216,10001.0,1.251,8.992,1.215,10001.0,1.251,8.992,1.215,10001.0,1.249,4.032,1.215,10001.0,1.25,1.632,1.215,10001.0,3.202000,3.808000,3.135000
output_gpu,SSD,Fused_Cast_Add_split_6119339226905839188_kernel,1001.0,1.432,4.192,1.4069999999999998,1001.0,1.783,7.84,1.664,10001.0,1.4340000000000002,4.64,1.4069999999999998,10001.0,1.4340000000000002,4.64,1.4069999999999998,10001.0,1.434,4.288,1.407,10001.0,1.239,1.6,1.215,10001.0,3.431000,4.000000,3.359000
output_gpu,SSD,Fused_Cast_Transpose_fusion_8575083783266515900_kernel,1001.0,98.445,105.695,88.671,1001.0,81.363,83.711,80.51,10001.0,78.13499999999998,86.911,76.127,10001.0,78.13499999999998,86.911,76.127,10001.0,78.426,86.62299999999999,76.095,10001.0,80.46000000000001,768.2139999999999,76.863,10001.0,83.65899999999999,89.85600000000001,80.352
output_gpu,SSD,Fused_AddN_553642492727381987_kernel,1001.0,1.479,4.288,1.44,1001.0,1.629,9.247,1.44,10001.0,1.477,4.512,1.439,10001.0,1.477,4.512,1.439,10001.0,1.475,4.288,1.439,10001.0,1.484,9.728,1.44,10001.0,3.251000,3.776000,3.199000
output_gpu,SSD,Fused_Cast_split_2803885096574224037_kernel,1001.0,1.24,4.063,1.215,1001.0,1.461,5.12,1.215,10001.0,1.238,4.416,1.215,10001.0,1.238,4.416,1.215,10001.0,1.238,4.128,1.215,10001.0,1.24,8.992,1.215,10001.0,3.165000,3.808000,3.103000
output_gpu,SSD,Fused_Cast_split_6450499081507729353_kernel,1001.0,3.1260000000000003,5.92,3.071,1001.0,3.145,6.784,3.072,10001.0,3.12,6.464,3.071,10001.0,3.12,6.464,3.071,10001.0,3.127,6.047,3.071,10001.0,4.35,5.088,4.16,10001.0,5.310000,5.952000,5.183000
output_gpu,SSD,Fused_Cast_split_16152989510416617257_kernel,1001.0,1.852,4.671,1.823,1001.0,1.871,2.4,1.823,10001.0,1.848,4.992,1.823,10001.0,1.848,4.992,1.823,10001.0,1.849,4.704,1.823,10001.0,1.856,2.4,1.76,10001.0,3.920000,4.511000,3.839000
output_gpu,SSD,Fused_Cast_split_33705149289147855_kernel,1001.0,1.308,4.032,1.279,1001.0,1.492,3.872,1.279,10001.0,1.305,4.384,1.279,10001.0,1.305,4.384,1.279,10001.0,1.305,4.095,1.279,10001.0,1.308,1.76,1.279,10001.0,3.277000,3.840000,3.231000
output_gpu,SSD,Fused_Cast_Add_split_8008768012427129270_kernel,1001.0,2.577,5.44,2.496,1001.0,2.5780000000000003,3.04,2.528,10001.0,2.603,5.888,2.496,10001.0,2.603,5.888,2.496,10001.0,2.602,5.504,2.496,10001.0,2.25,2.624,2.175,10001.0,4.712000,5.472000,4.576000
output_gpu,SSD,Fused_Cast_split_18269366456249801380_kernel,1001.0,1.273,4.0,1.247,1001.0,1.375,4.64,1.247,10001.0,1.271,4.352,1.247,10001.0,1.271,4.352,1.247,10001.0,1.27,4.032,1.247,10001.0,1.16,9.408,1.119,10001.0,3.231000,3.808000,3.167000
output_gpu,SSD,Fused_Cast_Transpose_fusion_14906925221957087346_kernel,1001.0,1.382,4.16,1.344,1001.0,1.521,4.512,1.344,10001.0,1.265,4.32,1.247,10001.0,1.265,4.32,1.247,10001.0,1.264,4.064,1.247,10001.0,1.099,1.504,1.056,10001.0,3.037000,3.584000,2.975000
output_gpu,SSD,Fused_Add_RealDiv_ReduceSum_split_13169468148967519997_kernel,1001.0,6.085,8.158999999999999,6.047000000000001,1001.0,6.081,6.816,6.047000000000001,10001.0,6.082999999999999,8.224,6.047000000000001,10001.0,6.082999999999999,8.224,6.047000000000001,10001.0,6.081,8.224,6.047,10001.0,6.081,6.688,6.047,10001.0,7.420000,8.320000,7.359000
output_gpu,SSD,Fused_Cast_split_11352014730600909349_kernel,1001.0,1.587,4.352,1.567,1001.0,1.679,2.9760000000000004,1.567,10001.0,1.584,4.832,1.536,10001.0,1.584,4.832,1.536,10001.0,1.582,4.416,1.536,10001.0,1.587,2.048,1.536,10001.0,3.353000,3.904000,3.295000
output_gpu,SSD,Fused_AddN_2776756669849473319_kernel,1001.0,3.604,6.688,3.52,1001.0,3.635,7.744,3.552,10001.0,3.6010000000000004,9.408,3.519,10001.0,3.6010000000000004,9.408,3.519,10001.0,3.594,6.656,3.487,10001.0,5.161,5.664,4.864,10001.0,5.347000,6.207000,5.215000
output_gpu,SSD,Fused_Cast_ReduceSum_split_13074136462214727637_kernel,1001.0,2.519,5.856,2.495,1001.0,2.516,3.36,2.495,10001.0,2.531,5.696000000000001,2.495,10001.0,2.531,5.696000000000001,2.495,10001.0,1.572,4.352,1.535,10001.0,1.573,1.952,1.535,10001.0,3.568000,4.064000,3.488000
output_gpu,SSD,Fused_Cast_Transpose_fusion_7870260878470997528_kernel,1001.0,1.278,3.968,1.247,1001.0,1.403,2.464,1.247,10001.0,1.256,4.256,1.216,10001.0,1.256,4.256,1.216,10001.0,1.254,4.032,1.215,10001.0,1.257,1.632,1.247,10001.0,3.198000,3.808000,3.135000
output_gpu,SSD,Fused_Cast_split_1481888045297046288_kernel,1001.0,2.358,5.184,2.272,1001.0,2.364,5.184,2.272,10001.0,2.352,8.288,2.24,10001.0,2.352,8.288,2.24,10001.0,2.354,5.312,2.24,10001.0,3.415,9.376,3.168,10001.0,4.430000,5.120000,4.320000
output_gpu,SSD,Fused_Cast_split_1763336141012377207_kernel,1001.0,1.246,3.968,1.215,1001.0,1.526,5.728,1.215,10001.0,1.244,4.287,1.215,10001.0,1.244,4.287,1.215,10001.0,1.244,4.032,1.215,10001.0,1.135,9.248,1.088,10001.0,3.207000,3.744000,3.135000
output_gpu,SSD,Fused_Cast_split_13927405271513243232_kernel,1001.0,1.849,4.672,1.823,1001.0,1.877,5.408,1.823,10001.0,1.844,5.024,1.7919999999999998,10001.0,1.844,5.024,1.7919999999999998,10001.0,1.845,4.736,1.823,10001.0,2.476,2.976,2.336,10001.0,3.916000,4.576000,3.839000
output_gpu,SSD,Fused_Add_fusion_5852454803092463262_kernel,1001.0,4.747,7.84,4.672,1001.0,4.75,5.504,4.672,10001.0,4.739,8.416,4.671,10001.0,4.739,8.416,4.671,10001.0,4.742,7.872,4.671,10001.0,7.366,8.0,7.008,10001.0,6.434000,7.104000,6.335000
output_gpu,SSD,Fused_Cast_Add_split_14586347109830967124_kernel,1001.0,4.024,6.912000000000001,3.904,1001.0,3.631,5.92,3.519,10001.0,3.873,7.136,3.711,10001.0,3.873,7.136,3.711,10001.0,3.878,6.944,3.712,10001.0,3.32,4.0,3.168,10001.0,5.535000,6.400000,5.312000
output_gpu,SSD,Fused_Cast_split_13273846269547512762_kernel,1001.0,2.373,5.247999999999999,2.335,1001.0,2.442,6.144,2.336,10001.0,2.423,5.6,2.335,10001.0,2.423,5.6,2.335,10001.0,2.423,5.44,2.336,10001.0,3.592,6.432,3.392,10001.0,4.586000,5.376000,4.479000
output_gpu,SSD,Fused_Reshape_Reshape_Cast_ReduceSum_split_4757686232814340433_kernel,1001.0,2.385,5.247999999999999,2.367,1001.0,2.383,2.88,2.336,10001.0,1.732,4.864,1.695,10001.0,1.732,4.864,1.695,10001.0,1.57,4.512,1.535,10001.0,1.571,2.144,1.535,10001.0,3.548000,4.256000,3.487000
output_gpu,SSD,Fused_Cast_Transpose_fusion_16189785156585381433_kernel,1001.0,1.328,4.256,1.311,1001.0,1.4240000000000002,2.3040000000000003,1.215,10001.0,1.252,4.288,1.215,10001.0,1.252,4.288,1.215,10001.0,1.251,4.032,1.215,10001.0,1.088,1.408,1.055,10001.0,2.993000,4.128000,2.943000
output_gpu,SSD,Fused_Cast_Add_split_11426368480323851228_kernel,1001.0,1.916,4.832,1.824,1001.0,1.93,2.3680000000000003,1.824,10001.0,2.018,5.472,1.92,10001.0,2.018,5.472,1.92,10001.0,2.02,4.992,1.92,10001.0,2.022,2.784,1.92,10001.0,3.956000,5.057000,3.871000
output_gpu,SSD,Fused_Cast_split_1313930308032406742_kernel,1001.0,1.642,4.512,1.6,1001.0,1.663,4.384,1.6,10001.0,1.639,4.736000000000001,1.599,10001.0,1.639,4.736000000000001,1.599,10001.0,1.639,4.448,1.599,10001.0,1.645,2.24,1.599,10001.0,3.652000,4.288000,3.583000
output_gpu,SSD,Fused_Cast_ReduceSum_split_98575232153399791_kernel,1001.0,3.455,6.4,3.423,1001.0,3.467,6.752000000000001,3.423,10001.0,2.229,7.584,2.176,10001.0,2.229,7.584,2.176,10001.0,6.142,8.96,6.111,10001.0,5.352,5.792,5.311,10001.0,8.408000,8.896000,8.351000
output_gpu,SSD,Fused_Cast_Add_split_1277837232225260640_kernel,1001.0,1.2819999999999998,4.0,1.248,1001.0,1.75,2.336,1.695,10001.0,1.2819999999999998,4.32,1.247,10001.0,1.2819999999999998,4.32,1.247,10001.0,1.282,4.064,1.247,10001.0,1.284,2.4,1.247,10001.0,3.244000,6.592000,3.167000
output_gpu,SSD,Fused_Cast_split_12205459202570025747_kernel,1001.0,1.348,4.064,1.3119999999999998,1001.0,1.4469999999999998,4.832,1.3119999999999998,10001.0,1.345,4.544,1.311,10001.0,1.345,4.544,1.311,10001.0,1.345,4.128,1.311,10001.0,1.352,9.472,1.311,10001.0,3.366000,4.287000,3.295000
output_gpu,SSD,Fused_Cast_split_1769514575996428979_kernel,1001.0,1.245,4.192,1.215,1001.0,1.355,2.4,1.215,10001.0,1.246,8.256,1.215,10001.0,1.246,8.256,1.215,10001.0,1.243,4.256,1.215,10001.0,1.13,3.424,1.119,10001.0,3.164000,3.872000,3.103000
output_gpu,SSD,Fused_Cast_split_15050971751546069948_kernel,1001.0,2.119,5.088,2.079,1001.0,2.119,2.784,2.079,10001.0,2.113,5.6,2.079,10001.0,2.113,5.6,2.079,10001.0,2.114,5.024,2.079,10001.0,2.129,2.688,2.08,10001.0,4.195000,4.864000,4.127000
output_gpu,SSD,Fused_Cast_ReduceSum_split_4564102481086974292_kernel,1001.0,3.0780000000000003,6.24,3.039,1001.0,3.0780000000000003,3.84,3.039,10001.0,2.376,6.336,2.336,10001.0,2.376,6.336,2.336,10001.0,1.576,4.352,1.535,10001.0,1.578,1.952,1.536,10001.0,3.545000,4.064000,3.487000
output_gpu,SSD,Fused_AddN_1767755676200115549_kernel,1001.0,45.602,50.623000000000005,44.895,1001.0,45.626000000000005,51.615,44.703,10001.0,45.243,51.231,44.031000000000006,10001.0,45.243,51.231,44.031000000000006,10001.0,45.21,50.62300000000001,44.031000000000006,10001.0,44.535,49.727,43.167,10001.0,47.211999999999996,51.071,46.047
output_gpu,SSD,Fused_Cast_Transpose_fusion_2177940963275324887_kernel,1001.0,1.285,4.128,1.247,1001.0,1.456,5.568,1.215,10001.0,1.234,4.255,1.215,10001.0,1.234,4.255,1.215,10001.0,1.234,4.0,1.215,10001.0,1.074,1.472,1.055,10001.0,2.960000,3.552000,2.911000
output_gpu,SSD,Fused_Cast_split_376840548964046763_kernel,1001.0,2.5610000000000004,5.696000000000001,2.464,1001.0,2.5580000000000003,3.136,2.463,10001.0,2.555,5.792000000000001,2.463,10001.0,2.555,5.792000000000001,2.463,10001.0,2.556,5.44,2.463,10001.0,3.888,4.352,3.712,10001.0,4.659000,5.536000,4.543000
output_gpu,SSD,Fused_Cast_split_3018345947883863000_kernel,1001.0,1.394,3.232,1.375,1001.0,1.537,7.456,1.375,10001.0,1.393,3.2960000000000003,1.375,10001.0,1.393,3.2960000000000003,1.375,10001.0,1.393,3.328,1.375,10001.0,1.393,1.76,1.375,10001.0,3.260000,3.808000,3.199000
output_gpu,SSD,Fused_Cast_Transpose_fusion_2797657901835827674_kernel,1001.0,1.286,4.032,1.248,1001.0,1.414,4.352,1.248,10001.0,1.226,4.288,1.1840000000000002,10001.0,1.226,4.288,1.1840000000000002,10001.0,1.227,4.16,1.184,10001.0,1.23,9.056,1.184,10001.0,2.972000,3.647000,2.911000
output_gpu,SSD,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.3730000000000002,3.392,1.3430000000000002,1001.0,1.489,2.496,1.4069999999999998,10001.0,1.422,9.568,1.4069999999999998,10001.0,1.422,9.568,1.4069999999999998,10001.0,1.421,3.648,1.376,10001.0,1.229,1.792,1.215,10001.0,3.050000,3.872000,3.007000
output_gpu,SSD,Fused_Cast_split_11022413506020011990_kernel,1001.0,1.347,4.224,1.3119999999999998,1001.0,1.587,3.008,1.3119999999999998,10001.0,1.346,1.7280000000000002,1.311,10001.0,1.346,1.7280000000000002,1.311,10001.0,1.346,4.159,1.311,10001.0,1.352,1.728,1.312,10001.0,3.128000,4.095000,3.071000
output_gpu,SSD,Fused_BroadcastTo_RealDiv_split_12841157589641099628_kernel,1001.0,1.471,3.488,1.44,1001.0,1.62,4.192,1.439,10001.0,1.469,3.551,1.439,10001.0,1.469,3.551,1.439,10001.0,1.469,3.552,1.439,10001.0,1.468,1.984,1.439,10001.0,3.345000,4.128000,3.263000
output_gpu,SSD,Fused_Cast_split_11178500732451507533_kernel,1001.0,1.3259999999999998,4.224,1.311,1001.0,1.555,2.656,1.311,10001.0,1.3230000000000002,4.384,1.311,10001.0,1.3230000000000002,4.384,1.311,10001.0,1.323,4.16,1.311,10001.0,1.325,1.792,1.311,10001.0,3.067000,4.352000,3.007000
output_gpu,SSD,Fused_Cast_split_11809476934814772730_kernel,1001.0,1.584,4.512,1.567,1001.0,1.6130000000000002,2.272,1.567,10001.0,1.584,4.704,1.536,10001.0,1.584,4.704,1.536,10001.0,1.583,4.448,1.536,10001.0,1.587,2.176,1.567,10001.0,3.608000,4.224000,3.551000
output_gpu,SSD,Fused_Cast_split_8041686982855616850_kernel,1001.0,1.246,3.936,1.215,1001.0,1.33,4.224,1.215,10001.0,1.245,4.256,1.215,10001.0,1.245,4.256,1.215,10001.0,1.244,4.0,1.215,10001.0,1.25,1.984,1.215,10001.0,3.210000,3.744000,3.135000
output_gpu,SSD,Fused_Cast_split_8144596830580054810_kernel,1001.0,1.237,3.935,1.215,1001.0,1.347,4.512,1.215,10001.0,1.237,4.256,1.215,10001.0,1.237,4.256,1.215,10001.0,1.237,4.032,1.215,10001.0,1.239,9.632,1.215,10001.0,3.229000,3.744000,3.167000
output_gpu,SSD,Fused_Cast_split_2082569438736450855_kernel,1001.0,1.238,3.936,1.215,1001.0,1.3219999999999998,4.032,1.215,10001.0,1.237,4.288,1.215,10001.0,1.237,4.288,1.215,10001.0,1.237,4.032,1.215,10001.0,1.125,1.536,1.087,10001.0,3.216000,3.744000,3.167000
output_gpu,SSD,Fused_Cast_split_8567783561953811773_kernel,1001.0,1.394,3.392,1.375,1001.0,1.457,3.392,1.375,10001.0,1.392,3.4560000000000004,1.375,10001.0,1.392,3.4560000000000004,1.375,10001.0,1.392,3.456,1.375,10001.0,1.392,1.92,1.375,10001.0,3.262000,4.000000,3.199000
output_gpu,SSD,Fused_Reshape_Reshape_Cast_ReduceSum_split_10356419823469240956_kernel,1001.0,1.638,4.416,1.6,1001.0,1.702,3.232,1.599,10001.0,1.692,3.616,1.663,10001.0,1.692,3.616,1.663,10001.0,1.691,3.616,1.663,10001.0,1.692,8.736,1.663,10001.0,3.531000,4.128000,3.487000
output_gpu,SSD,Fused_Cast_split_16574106532409231701_kernel,1001.0,1.244,3.936,1.215,1001.0,1.381,4.448,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,4.256,1.215,10001.0,1.243,3.999,1.215,10001.0,1.245,12.672,1.215,10001.0,3.237000,3.775000,3.167000
output_gpu,SSD,Fused_Cast_split_7517801487255724429_kernel,1001.0,2.7,5.504,2.624,1001.0,2.7110000000000003,5.728,2.624,10001.0,2.695,5.983,2.592,10001.0,2.695,5.983,2.592,10001.0,2.695,5.632,2.592,10001.0,4.016,9.664,3.839,10001.0,4.837000,5.536000,4.703000
output_gpu,SSD,Fused_Cast_split_6311674897489810490_kernel,1001.0,1.294,4.0,1.248,1001.0,1.439,2.432,1.247,10001.0,1.291,4.32,1.248,10001.0,1.291,4.32,1.248,10001.0,1.291,4.096,1.247,10001.0,1.313,2.208,1.248,10001.0,3.271000,3.807000,3.199000
output_gpu,SSD,Fused_Add_split_5852454803092463262_kernel,1001.0,4.745,7.84,4.671,1001.0,4.774,8.16,4.703,10001.0,4.739,8.128,4.64,10001.0,4.739,8.128,4.64,10001.0,4.743,7.871,4.671,10001.0,6.324,9.504,5.984,10001.0,6.418000,7.424000,6.335000
output_gpu,SSD,Fused_Cast_split_2637722425996998704_kernel,1001.0,1.421,4.127,1.376,1001.0,1.6269999999999998,4.48,1.4069999999999998,10001.0,1.42,4.672,1.376,10001.0,1.42,4.672,1.376,10001.0,1.419,4.224,1.376,10001.0,1.424,1.856,1.407,10001.0,3.436000,4.000000,3.359000
output_gpu,SSD,Fused_Cast_ReduceSum_split_11134459247345513023_kernel,1001.0,3.092,4.96,3.071,1001.0,3.091,3.552,3.071,10001.0,3.091,3.52,3.071,10001.0,3.091,3.52,3.071,10001.0,3.091,5.024,3.071,10001.0,3.091,3.488,3.071,10001.0,5.036000,5.632000,4.960000
output_gpu,SSD,Fused_Cast_split_7191617525348129337_kernel,1001.0,1.849,4.864,1.823,1001.0,1.904,2.656,1.823,10001.0,1.844,4.992,1.823,10001.0,1.844,4.992,1.823,10001.0,1.845,4.768,1.792,10001.0,2.469,2.976,2.368,10001.0,3.914000,4.576000,3.839000
output_gpu,SSD,Fused_Add_fusion_2516020840714627897_kernel,1001.0,54.657,60.448,53.887,1001.0,54.659,57.599,53.951,10001.0,53.62900000000001,697.586,52.543,10001.0,53.62900000000001,697.586,52.543,10001.0,53.239,56.447,52.511,10001.0,57.238,64.191,54.688,10001.0,60.373000000000005,64.0,59.104
output_gpu,SSD,Fused_Cast_Transpose_fusion_9415032493456083933_kernel,1001.0,2.011,4.8,1.983,1001.0,1.355,3.967,1.216,10001.0,1.245,4.224,1.215,10001.0,1.245,4.224,1.215,10001.0,1.245,4.0,1.215,10001.0,1.248,4.256,1.215,10001.0,2.987000,3.488000,2.943000
output_gpu,SSD,Fused_Add_split_10374564141404430261_kernel,1001.0,9.487,12.448,9.216,1001.0,9.473,10.016,9.183,10001.0,9.465,13.088,9.12,10001.0,9.465,13.088,9.12,10001.0,9.465,12.415,9.152,10001.0,9.372,12.128,8.992,10001.0,11.120000,12.672000,10.623000
output_gpu,SSD,Fused_Cast_Transpose_fusion_2633634533823145655_kernel,1001.0,1.328,4.064,1.311,1001.0,1.513,3.968,1.216,10001.0,1.233,4.256,1.215,10001.0,1.233,4.256,1.215,10001.0,1.233,4.0,1.215,10001.0,1.234,1.632,1.215,10001.0,2.969000,3.520000,2.911000
output_gpu,SSD,Fused_Cast_Add_split_5156663532344680727_kernel,1001.0,10.612,13.728,10.368,1001.0,9.0,9.728,8.735,10001.0,9.614,13.152,8.64,10001.0,9.614,13.152,8.64,10001.0,9.682,13.023,9.343,10001.0,8.236,9.888,7.936,10001.0,11.297000,12.416000,10.881000
output_gpu,SSD,Fused_Cast_split_3723627380793875550_kernel,1001.0,1.324,4.064,1.311,1001.0,1.541,3.104,1.311,10001.0,1.3219999999999998,4.384,1.311,10001.0,1.3219999999999998,4.384,1.311,10001.0,1.323,4.128,1.311,10001.0,1.348,2.208,1.311,10001.0,3.295000,3.936000,3.231000
output_gpu,SSD,Fused_Cast_Transpose_fusion_4084597834872325192_kernel,1001.0,1.3869999999999998,4.224,1.375,1001.0,1.48,7.968,1.344,10001.0,1.325,8.863999999999999,1.311,10001.0,1.325,8.863999999999999,1.311,10001.0,1.323,4.192,1.311,10001.0,1.325,1.76,1.311,10001.0,3.086000,3.648000,3.039000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_8425661999072486419_kernel,1001.0,2.044,4.96,2.015,1001.0,1.369,4.032,1.279,10001.0,1.286,4.032,1.247,10001.0,1.286,4.032,1.247,10001.0,1.285,4.032,1.247,10001.0,1.27,2.24,1.215,10001.0,3.361000,3.936000,3.295000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_17565781733061768721_kernel,1001.0,1.624,4.352,1.599,1001.0,1.618,2.335,1.568,10001.0,1.5719999999999998,4.384,1.535,10001.0,1.5719999999999998,4.384,1.535,10001.0,1.571,4.384,1.535,10001.0,1.565,1.984,1.44,10001.0,3.589000,4.160000,3.519000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_8525654358023826627_kernel,1001.0,2.684,5.632000000000001,2.655,1001.0,2.6710000000000003,6.175,2.623,10001.0,2.684,5.696000000000001,2.655,10001.0,2.684,5.696000000000001,2.655,10001.0,2.684,5.76,2.655,10001.0,4.615,5.088,4.575,10001.0,7.304000,7.936000,7.231000
output_gpu,googlenet,Fused_Cast_split_8351751667762880221_kernel,1001.0,1.237,3.936,1.215,1001.0,1.609,7.327999999999999,1.216,10001.0,1.235,4.0,1.215,10001.0,1.235,4.0,1.215,10001.0,1.236,4.032,1.215,10001.0,1.248,1.599,1.215,10001.0,3.216000,3.776000,3.135000
output_gpu,googlenet,Fused_Cast_split_9097454469492922119_kernel,1001.0,1.245,3.936,1.215,1001.0,1.496,4.768,1.216,10001.0,1.244,4.032,1.215,10001.0,1.244,4.032,1.215,10001.0,1.243,4.063,1.215,10001.0,1.25,1.664,1.215,10001.0,3.211000,3.744000,3.135000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_16484906519284536972_kernel,1001.0,1.937,4.768,1.887,1001.0,2.01,3.2960000000000003,1.887,10001.0,1.768,4.5760000000000005,1.727,10001.0,1.768,4.5760000000000005,1.727,10001.0,1.772,9.599,1.727,10001.0,1.758,2.176,1.727,10001.0,3.784000,4.384000,3.743000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_12960650180575389005_kernel,1001.0,1.267,3.968,1.247,1001.0,1.425,4.032,1.247,10001.0,1.267,4.032,1.247,10001.0,1.267,4.032,1.247,10001.0,1.268,4.064,1.247,10001.0,1.266,1.632,1.247,10001.0,3.241000,4.032000,3.167000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_6221362681749431985_kernel,1001.0,2.988,6.176,2.912,1001.0,2.992,3.68,2.911,10001.0,2.797,6.24,2.719,10001.0,2.797,6.24,2.719,10001.0,2.8,6.016,2.719,10001.0,2.554,3.168,2.527,10001.0,4.943000,6.144000,4.832000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_16624597048639791099_kernel,1001.0,1.305,4.064,1.279,1001.0,1.515,2.528,1.279,10001.0,1.294,4.064,1.248,10001.0,1.294,4.064,1.248,10001.0,1.294,4.064,1.279,10001.0,1.308,1.728,1.248,10001.0,3.268000,3.840000,3.200000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_13300606549129559438_kernel,1001.0,2.652,5.472,2.432,1001.0,2.802,3.808,2.688,10001.0,2.793,5.888,2.687,10001.0,2.793,5.888,2.687,10001.0,2.793,6.08,2.687,10001.0,2.066,2.464,2.016,10001.0,4.126000,4.736000,4.063000
output_gpu,googlenet,Fused_Cast_BiasAdd_Cast_fusion_7331691450598000754_kernel,1001.0,1.311,4.192,1.279,1001.0,1.547,5.088,1.471,10001.0,1.501,4.543,1.471,10001.0,1.501,4.543,1.471,10001.0,1.5,4.384,1.471,10001.0,1.291,6.912,1.248,10001.0,3.218000,3.904000,3.167000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_985476699732197010_kernel,1001.0,2.301,5.312,2.271,1001.0,2.282,2.8480000000000003,2.239,10001.0,2.292,5.216,2.239,10001.0,2.292,5.216,2.239,10001.0,2.292,5.376,2.24,10001.0,3.508,4.096,3.36,10001.0,6.048000,6.752000,5.983000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_2464321920329467851_kernel,1001.0,1.315,4.064,1.279,1001.0,1.483,4.5760000000000005,1.279,10001.0,1.3119999999999998,4.224,1.279,10001.0,1.3119999999999998,4.224,1.279,10001.0,1.312,4.064,1.279,10001.0,1.315,1.696,1.279,10001.0,3.302000,3.872000,3.231000
output_gpu,googlenet,Fused_Cast_split_7934849603993508191_kernel,1001.0,1.784,4.544,1.759,1001.0,1.787,5.568,1.7280000000000002,10001.0,1.779,4.736000000000001,1.759,10001.0,1.779,4.736000000000001,1.759,10001.0,1.779,4.768,1.759,10001.0,1.769,2.432,1.728,10001.0,3.833000,4.480000,3.775000
output_gpu,googlenet,Fused_AddN_5020006482210682953_kernel,1001.0,172.89700000000002,179.134,169.502,1001.0,170.451,174.206,167.93400000000003,10001.0,170.813,179.74,167.965,10001.0,170.813,179.74,167.965,10001.0,170.91899999999995,179.357,167.773,10001.0,164.312,172.126,162.75,10001.0,173.859,179.774,170.62199999999999
output_gpu,googlenet,Fused_Cast_split_12477723516016570265_kernel,1001.0,1.662,4.448,1.631,1001.0,1.6869999999999998,3.712,1.6,10001.0,1.6569999999999998,4.512,1.631,10001.0,1.6569999999999998,4.512,1.631,10001.0,1.657,4.512,1.631,10001.0,1.642,2.016,1.599,10001.0,3.675000,4.448000,3.615000
output_gpu,googlenet,Fused_Cast_split_6432351970612290170_kernel,1001.0,2.217,5.12,2.143,1001.0,2.187,2.752,2.112,10001.0,2.212,5.247999999999999,2.111,10001.0,2.212,5.247999999999999,2.111,10001.0,2.212,5.184,2.112,10001.0,3.124,3.808,2.943,10001.0,4.321000,5.120000,4.191000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_14468487326990777567_kernel,1001.0,1.998,4.864,1.952,1001.0,2.068,4.672,1.951,10001.0,1.995,5.024,1.951,10001.0,1.995,5.024,1.951,10001.0,1.995,5.088,1.951,10001.0,1.65,2.015,1.599,10001.0,3.658000,4.224000,3.615000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_15960949892476975778_kernel,1001.0,1.476,4.352,1.439,1001.0,1.571,2.432,1.439,10001.0,1.473,4.384,1.439,10001.0,1.473,4.384,1.439,10001.0,1.473,4.416,1.439,10001.0,1.456,1.888,1.439,10001.0,3.480000,4.032000,3.423000
output_gpu,googlenet,Fused_Cast_split_16846804108256781062_kernel,1001.0,1.871,4.704,1.824,1001.0,1.909,4.736000000000001,1.7919999999999998,10001.0,1.866,4.768,1.823,10001.0,1.866,4.768,1.823,10001.0,1.866,4.992,1.823,10001.0,2.545,3.168,2.399,10001.0,3.931000,4.672000,3.871000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_15628204010922053266_kernel,1001.0,4.0680000000000005,7.36,3.967,1001.0,4.104,8.351,4.0,10001.0,4.08,7.327999999999999,3.999,10001.0,4.08,7.327999999999999,3.999,10001.0,4.083,7.232,3.968,10001.0,3.801,4.256,3.743,10001.0,5.532000,6.176000,5.471000
output_gpu,googlenet,Fused_Cast_split_3210942441325238138_kernel,1001.0,1.422,4.16,1.376,1001.0,1.504,4.192,1.375,10001.0,1.419,4.32,1.376,10001.0,1.419,4.32,1.376,10001.0,1.419,4.224,1.376,10001.0,1.429,3.392,1.375,10001.0,3.439000,3.967000,3.360000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_17307873758666687946_kernel,1001.0,4.59,7.999,4.448,1001.0,4.678,8.032,4.544,10001.0,4.673,7.808,4.512,10001.0,4.673,7.808,4.512,10001.0,4.672,7.712,4.543,10001.0,4.046,9.44,3.999,10001.0,6.237000,7.552000,6.175000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_13763939828782847103_kernel,1001.0,2.483,5.472,2.4,1001.0,2.489,5.5360000000000005,2.4,10001.0,2.393,5.504,2.24,10001.0,2.393,5.504,2.24,10001.0,2.392,5.376,2.272,10001.0,2.194,2.784,2.143,10001.0,4.507000,5.120000,4.351000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_17884474370717013435_kernel,1001.0,1.926,4.8,1.887,1001.0,1.466,6.816,1.28,10001.0,1.3090000000000002,9.312,1.279,10001.0,1.3090000000000002,9.312,1.279,10001.0,1.308,8.288,1.279,10001.0,1.321,4.064,1.279,10001.0,3.272000,3.840000,3.199000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_17524826685966761446_kernel,1001.0,2.514,5.472,2.4,1001.0,2.509,6.24,2.4,10001.0,2.506,5.5360000000000005,2.4,10001.0,2.506,5.5360000000000005,2.4,10001.0,2.507,5.568,2.4,10001.0,3.985,6.944,3.776,10001.0,5.613000,6.240000,5.343000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_10407209713875887145_kernel,1001.0,1.3330000000000002,4.064,1.311,1001.0,1.404,6.688,1.311,10001.0,1.317,4.416,1.279,10001.0,1.317,4.416,1.279,10001.0,1.318,4.288,1.28,10001.0,1.308,1.696,1.279,10001.0,3.311000,3.903000,3.232000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_10932316912452917541_kernel,1001.0,1.273,4.0,1.247,1001.0,1.5219999999999998,4.512,1.247,10001.0,1.271,4.064,1.247,10001.0,1.271,4.064,1.247,10001.0,1.273,9.28,1.247,10001.0,1.31,2.24,1.247,10001.0,3.232000,3.903000,3.167000
output_gpu,googlenet,Fused_Cast_split_12944131420817490762_kernel,1001.0,1.367,4.224,1.3430000000000002,1001.0,1.506,5.5360000000000005,1.3119999999999998,10001.0,1.366,4.16,1.3430000000000002,10001.0,1.366,4.16,1.3430000000000002,10001.0,1.365,4.16,1.343,10001.0,1.435,5.824,1.312,10001.0,3.382000,3.936000,3.327000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_15284429005205827759_kernel,1001.0,2.699,5.696000000000001,2.592,1001.0,2.702,5.76,2.592,10001.0,2.6910000000000003,9.76,2.56,10001.0,2.6910000000000003,9.76,2.56,10001.0,2.687,5.759,2.56,10001.0,4.792,5.184,4.608,10001.0,6.387000,7.072000,6.208000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_10569244855739474644_kernel,1001.0,2.5810000000000004,5.664,2.528,1001.0,2.418,3.232,2.3680000000000003,10001.0,2.432,5.28,2.399,10001.0,2.432,5.28,2.399,10001.0,2.437,5.28,2.399,10001.0,2.077,2.592,2.047,10001.0,3.844000,4.384000,3.775000
output_gpu,googlenet,Fused_BroadcastTo_inplace_assign_builder_2843991197081095484_kernel,1001.0,1.224,2.9760000000000004,1.1840000000000002,1001.0,1.414,3.744,1.1840000000000002,10001.0,1.223,3.072,1.183,10001.0,1.223,3.072,1.183,10001.0,1.225,3.072,1.183,10001.0,1.055,1.504,1.023,10001.0,2.887000,3.455000,2.815000
output_gpu,googlenet,Fused_Cast_split_3441259872626498741_kernel,1001.0,1.682,4.416,1.663,1001.0,1.711,8.158999999999999,1.631,10001.0,1.679,4.64,1.632,10001.0,1.679,4.64,1.632,10001.0,1.679,4.512,1.632,10001.0,1.658,2.048,1.631,10001.0,3.708000,4.320000,3.647000
output_gpu,googlenet,Fused_Cast_split_14240464581170454164_kernel,1001.0,1.307,4.064,1.279,1001.0,1.5119999999999998,9.536,1.279,10001.0,1.305,4.192,1.279,10001.0,1.305,4.192,1.279,10001.0,1.305,4.288,1.279,10001.0,1.301,1.696,1.279,10001.0,3.296000,4.672000,3.231000
output_gpu,googlenet,Fused_Reshape_Tile_RealDiv_Transpose_fusion_990392198561074619_kernel,1001.0,27.6,31.84,27.424,1001.0,33.015,34.751999999999995,32.128,10001.0,31.266,37.471,28.927,10001.0,31.266,37.471,28.927,10001.0,31.693,36.991,29.023000000000003,10001.0,44.596,47.167,42.847,10001.0,34.472,36.928000000000004,33.152
output_gpu,googlenet,Fused_Mul_Cast_Add_fusion_14735253733827235295_kernel,1001.0,1.275,4.064,1.247,1001.0,1.711,5.12,1.247,10001.0,1.272,4.064,1.247,10001.0,1.272,4.064,1.247,10001.0,1.273,4.063,1.247,10001.0,1.278,2.208,1.247,10001.0,3.245000,3.808000,3.167000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_3298769533213520252_kernel,1001.0,1.48,4.191,1.44,1001.0,1.62,2.912,1.439,10001.0,1.473,4.288,1.439,10001.0,1.473,4.288,1.439,10001.0,1.473,4.416,1.439,10001.0,1.463,1.824,1.439,10001.0,3.488000,4.096000,3.423000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_10307557682690487681_kernel,1001.0,1.3769999999999998,4.096,1.3430000000000002,1001.0,1.496,4.96,1.3430000000000002,10001.0,1.318,4.16,1.279,10001.0,1.318,4.16,1.279,10001.0,1.319,4.128,1.28,10001.0,1.317,1.824,1.279,10001.0,3.306000,4.032000,3.231000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_15058073767578379276_kernel,1001.0,1.426,4.192,1.4069999999999998,1001.0,1.588,3.008,1.28,10001.0,1.307,4.223,1.279,10001.0,1.307,4.223,1.279,10001.0,1.31,4.096,1.279,10001.0,1.351,18.368,1.279,10001.0,3.033000,3.872000,2.975000
output_gpu,googlenet,Fused_AddN_491961823752249310_kernel,1001.0,157.808,165.054,155.582,1001.0,156.183,159.678,154.526,10001.0,156.47799999999998,163.485,153.885,10001.0,156.47799999999998,163.485,153.885,10001.0,156.34799999999996,163.67700000000002,154.238,10001.0,153.89700000000002,913.908,151.48600000000002,10001.0,160.769,164.28699999999998,157.95000000000002
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_17564330104670872687_kernel,1001.0,3.26,6.656000000000001,3.2310000000000003,1001.0,3.236,6.56,3.199,10001.0,3.25,6.207999999999999,3.199,10001.0,3.25,6.207999999999999,3.199,10001.0,3.249,6.784,3.199,10001.0,6.401,7.2,6.367,10001.0,8.038000,8.768000,7.968000
output_gpu,googlenet,Fused_Transpose_split_12205482752262495869_kernel,1001.0,94.703,98.974,93.919,1001.0,92.073,97.087,91.423,10001.0,83.637,95.646,80.126,10001.0,83.637,95.646,80.126,10001.0,81.788,95.966,80.062,10001.0,60.759,64.12700000000001,58.303,10001.0,66.074,68.64,63.679
output_gpu,googlenet,Fused_Cast_split_6359548242704342688_kernel,1001.0,1.293,4.032,1.248,1001.0,1.635,7.391,1.248,10001.0,1.291,4.096,1.247,10001.0,1.291,4.096,1.247,10001.0,1.292,4.256,1.247,10001.0,1.287,1.695,1.247,10001.0,3.265000,3.840000,3.199000
output_gpu,googlenet,Fused_Cast_split_13872541284785479001_kernel,1001.0,1.3,4.0,1.279,1001.0,1.531,5.696000000000001,1.248,10001.0,1.298,4.128,1.248,10001.0,1.298,4.128,1.248,10001.0,1.298,4.127,1.279,10001.0,1.295,1.664,1.248,10001.0,3.318000,4.065000,3.263000
output_gpu,googlenet,Fused_Cast_split_5824536879097464366_kernel,1001.0,1.399,4.256,1.375,1001.0,1.461,4.288,1.3430000000000002,10001.0,1.396,4.192,1.375,10001.0,1.396,4.192,1.375,10001.0,1.396,4.352,1.375,10001.0,1.383,4.224,1.344,10001.0,3.412000,4.000000,3.359000
output_gpu,googlenet,Fused_Mul_Add_split_7973361985399515388_kernel,1001.0,1.404,3.424,1.375,1001.0,1.548,6.56,1.375,10001.0,1.402,3.4560000000000004,1.375,10001.0,1.402,3.4560000000000004,1.375,10001.0,1.402,8.992,1.375,10001.0,1.394,1.92,1.375,10001.0,3.262000,3.968000,3.199000
output_gpu,googlenet,Fused_AddN_2216432224023892243_kernel,1001.0,255.847,262.973,251.357,1001.0,256.557,1121.394,251.933,10001.0,253.393,261.115,250.075,10001.0,253.393,261.115,250.075,10001.0,253.41000000000005,262.875,249.947,10001.0,245.033,1027.954,242.141,10001.0,259.149,264.445,255.646
output_gpu,googlenet,Fused_Cast_Transpose_fusion_988569886767378385_kernel,1001.0,2.685,5.662999999999999,2.624,1001.0,2.426,7.52,2.3680000000000003,10001.0,2.43,5.472,2.399,10001.0,2.43,5.472,2.399,10001.0,2.431,5.472,2.399,10001.0,2.205,2.688,2.175,10001.0,4.278000,4.896000,4.191000
output_gpu,googlenet,Fused_Cast_split_3945175514709277030_kernel,1001.0,1.273,4.064,1.247,1001.0,1.4980000000000002,2.4,1.248,10001.0,1.271,4.224,1.247,10001.0,1.271,4.224,1.247,10001.0,1.271,4.032,1.247,10001.0,1.284,1.632,1.247,10001.0,3.234000,3.776000,3.167000
output_gpu,googlenet,Fused_Cast_split_863766618828009441_kernel,1001.0,1.954,4.96,1.919,1001.0,1.94,4.96,1.888,10001.0,1.948,5.023,1.919,10001.0,1.948,5.023,1.919,10001.0,1.947,4.864,1.919,10001.0,1.926,2.336,1.887,10001.0,4.019000,4.704000,3.967000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_13608001989381236150_kernel,1001.0,1.675,4.416,1.632,1001.0,1.75,4.8,1.663,10001.0,1.675,4.48,1.632,10001.0,1.675,4.48,1.632,10001.0,1.675,4.511,1.631,10001.0,1.439,1.76,1.407,10001.0,3.454000,4.000000,3.391000
output_gpu,googlenet,Fused_Cast_split_4750494009050628799_kernel,1001.0,1.247,3.968,1.215,1001.0,1.3430000000000002,2.3040000000000003,1.215,10001.0,1.245,4.0,1.215,10001.0,1.245,4.0,1.215,10001.0,1.245,4.128,1.215,10001.0,1.256,4.0,1.215,10001.0,3.212000,4.320000,3.135000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_17696775701570326413_kernel,1001.0,1.486,4.192,1.471,1001.0,1.629,6.336,1.44,10001.0,1.444,4.383,1.4069999999999998,10001.0,1.444,4.383,1.4069999999999998,10001.0,1.444,4.256,1.407,10001.0,1.425,1.824,1.407,10001.0,3.459000,4.032000,3.391000
output_gpu,googlenet,Fused_Cast_split_11131193624381640830_kernel,1001.0,1.853,4.672,1.824,1001.0,1.897,5.92,1.7919999999999998,10001.0,1.849,4.768,1.823,10001.0,1.849,4.768,1.823,10001.0,1.848,4.864,1.823,10001.0,1.824,2.304,1.791,10001.0,3.911000,4.608000,3.839000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_13762547040308587952_kernel,1001.0,1.618,4.32,1.599,1001.0,1.628,2.4,1.567,10001.0,1.617,4.6080000000000005,1.599,10001.0,1.617,4.6080000000000005,1.599,10001.0,1.619,9.504,1.599,10001.0,2.197,2.656,2.175,10001.0,4.279000,4.928000,4.223000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_11400608803066576619_kernel,1001.0,170.234,177.11700000000002,167.00599999999997,1001.0,155.476,159.23,153.726,10001.0,152.68200000000004,162.172,150.941,10001.0,152.68200000000004,162.172,150.941,10001.0,152.45,161.661,150.97299999999996,10001.0,189.043,990.227,185.438,10001.0,199.294,211.64600000000002,196.382
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_17905282520206714046_kernel,1001.0,8.302,11.2,7.968,1001.0,8.418,12.896,8.096,10001.0,8.435,12.32,8.0,10001.0,8.435,12.32,8.0,10001.0,8.435,12.352,8.0,10001.0,11.454,12.576,10.848,10001.0,13.055000,14.112000,12.352000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_10434588479252844865_kernel,1001.0,3.1010000000000004,6.272,2.9760000000000004,1001.0,3.125,7.68,3.072,10001.0,3.11,6.016,3.071,10001.0,3.11,6.016,3.071,10001.0,3.11,6.016,3.071,10001.0,2.518,2.976,2.464,10001.0,4.272000,4.896000,4.191000
output_gpu,googlenet,Fused_Cast_split_18269366456249801380_kernel,1001.0,1.273,4.128,1.247,1001.0,1.83,9.216,1.248,10001.0,1.27,4.191,1.247,10001.0,1.27,4.191,1.247,10001.0,1.27,4.032,1.247,10001.0,1.284,1.888,1.247,10001.0,3.233000,3.776000,3.167000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_1668508625948198013_kernel,1001.0,1.431,4.192,1.4069999999999998,1001.0,1.536,7.456,1.4069999999999998,10001.0,1.436,4.224,1.4069999999999998,10001.0,1.436,4.224,1.4069999999999998,10001.0,1.436,4.384,1.407,10001.0,1.429,1.792,1.376,10001.0,3.202000,3.712000,3.135000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_621728505791333861_kernel,1001.0,1.287,3.968,1.247,1001.0,1.496,9.376,1.248,10001.0,1.27,4.064,1.247,10001.0,1.27,4.064,1.247,10001.0,1.27,4.032,1.247,10001.0,1.276,1.76,1.247,10001.0,3.228000,3.777000,3.167000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_6181777797024894787_kernel,1001.0,2.5780000000000003,5.568,2.559,1001.0,2.557,3.072,2.527,10001.0,2.237,5.247999999999999,2.207,10001.0,2.237,5.247999999999999,2.207,10001.0,2.237,5.248,2.207,10001.0,2.215,2.752,2.175,10001.0,4.304000,5.055000,4.224000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_2731651175468614111_kernel,1001.0,1.331,4.192,1.311,1001.0,1.389,2.56,1.28,10001.0,1.315,9.376,1.279,10001.0,1.315,9.376,1.279,10001.0,1.312,4.256,1.279,10001.0,1.315,1.824,1.279,10001.0,3.299000,3.872000,3.231000
output_gpu,googlenet,Fused_Cast_fusion_859762677321748306_kernel,1001.0,1.256,3.936,1.216,1001.0,1.369,4.384,1.215,10001.0,1.254,4.16,1.215,10001.0,1.254,4.16,1.215,10001.0,1.254,4.032,1.215,10001.0,1.252,1.6,1.215,10001.0,3.222000,3.775000,3.167000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_17251857033887546907_kernel,1001.0,1.346,4.096,1.311,1001.0,1.385,4.288,1.247,10001.0,1.27,4.064,1.216,10001.0,1.27,4.064,1.216,10001.0,1.269,4.064,1.215,10001.0,1.275,1.599,1.247,10001.0,3.207000,3.777000,3.135000
output_gpu,googlenet,Fused_Cast_split_3239709790488242188_kernel,1001.0,1.803,4.672,1.759,1001.0,1.831,6.944,1.759,10001.0,1.798,4.64,1.759,10001.0,1.798,4.64,1.759,10001.0,1.797,4.736,1.759,10001.0,1.776,2.336,1.728,10001.0,3.856000,4.576000,3.775000
output_gpu,googlenet,Fused_AddN_12741515264242810734_kernel,1001.0,177.266,184.926,173.79,1001.0,176.87400000000002,182.43,173.75799999999998,10001.0,176.09199999999996,185.661,172.989,10001.0,176.09199999999996,185.661,172.989,10001.0,175.984,185.597,172.57299999999998,10001.0,169.314,942.516,167.422,10001.0,178.97299999999998,182.782,175.838
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_8347870740456626396_kernel,1001.0,4.152,7.263999999999999,4.095,1001.0,4.14,4.895,4.064,10001.0,4.132,7.455999999999999,4.063,10001.0,4.132,7.455999999999999,4.063,10001.0,4.131,7.36,4.032,10001.0,7.976,8.511,7.744,10001.0,10.364000,11.488000,10.048000
output_gpu,googlenet,Fused_Cast_split_12336856021567915391_kernel,1001.0,2.148,5.056,2.111,1001.0,2.141,12.736,2.08,10001.0,2.145,5.12,2.111,10001.0,2.145,5.12,2.111,10001.0,2.145,5.088,2.111,10001.0,2.118,2.656,2.079,10001.0,4.215000,4.928000,4.159000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_12050718480258958537_kernel,1001.0,1.796,4.704,1.759,1001.0,1.787,2.496,1.7280000000000002,10001.0,1.785,9.472,1.759,10001.0,1.785,9.472,1.759,10001.0,1.785,4.768,1.759,10001.0,2.577,3.168,2.559,10001.0,4.699000,5.792000,4.608000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_481195201934549875_kernel,1001.0,1.338,4.0,1.311,1001.0,1.394,4.352,1.311,10001.0,1.334,4.064,1.311,10001.0,1.334,4.064,1.311,10001.0,1.334,4.096,1.311,10001.0,1.316,1.728,1.279,10001.0,3.204000,3.776000,3.136000
output_gpu,googlenet,Fused_Cast_split_5317033797520455261_kernel,1001.0,1.3030000000000002,4.032,1.279,1001.0,1.469,2.496,1.279,10001.0,1.3019999999999998,4.256,1.279,10001.0,1.3019999999999998,4.256,1.279,10001.0,1.301,4.256,1.279,10001.0,1.303,4.096,1.279,10001.0,3.279000,3.904000,3.200000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_7208966610560314858_kernel,1001.0,1.537,4.256,1.503,1001.0,1.465,4.48,1.3119999999999998,10001.0,1.338,4.096,1.311,10001.0,1.338,4.096,1.311,10001.0,1.338,4.128,1.311,10001.0,1.959,5.056,1.92,10001.0,3.684000,4.256000,3.647000
output_gpu,googlenet,Fused_Cast_split_14909630329677733268_kernel,1001.0,1.599,4.384,1.567,1001.0,1.644,2.4,1.567,10001.0,1.596,4.416,1.567,10001.0,1.596,4.416,1.567,10001.0,1.595,4.416,1.567,10001.0,1.582,1.984,1.536,10001.0,3.611000,4.255000,3.551000
output_gpu,googlenet,Fused_Cast_Cast_ReduceSum_split_10289748916283163852_kernel,1001.0,2.849,6.72,2.815,1001.0,2.858,6.496,2.815,10001.0,13.194,16.224,12.319,10001.0,13.194,16.224,12.319,10001.0,4.004,6.784,3.967,10001.0,4.185,4.544,4.159,10001.0,5.604000,6.400000,5.535000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_13769394050066039635_kernel,1001.0,2.8310000000000004,6.112,2.783,1001.0,2.815,3.2960000000000003,2.783,10001.0,2.82,5.76,2.783,10001.0,2.82,5.76,2.783,10001.0,2.821,5.824,2.783,10001.0,5.133,5.792,5.087,10001.0,7.414000,8.161000,7.359000
output_gpu,googlenet,Fused_Cast_split_11151754459393097913_kernel,1001.0,1.422,4.128,1.376,1001.0,1.655,5.408,1.376,10001.0,1.419,4.352,1.376,10001.0,1.419,4.352,1.376,10001.0,1.419,4.224,1.376,10001.0,1.401,1.888,1.375,10001.0,3.428000,4.000000,3.359000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_1766553946511614565_kernel,1001.0,2.379,5.408,2.336,1001.0,2.372,2.944,2.335,10001.0,2.221,5.568,2.175,10001.0,2.221,5.568,2.175,10001.0,2.221,5.184,2.175,10001.0,2.199,2.72,2.048,10001.0,4.289000,4.960000,4.223000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_11493409533429099329_kernel,1001.0,1.881,4.704,1.855,1001.0,1.89,5.568,1.823,10001.0,1.877,9.311,1.824,10001.0,1.877,9.311,1.824,10001.0,1.874,8.96,1.824,10001.0,1.853,7.2,1.823,10001.0,3.930000,4.608000,3.840000
output_gpu,googlenet,Fused_AddN_2115116612883922903_kernel,1001.0,70.613,76.959,69.663,1001.0,69.45,73.791,68.831,10001.0,69.70700000000001,77.214,68.542,10001.0,69.70700000000001,77.214,68.542,10001.0,69.75099999999999,76.958,68.575,10001.0,68.54899999999999,825.59,67.167,10001.0,72.717,77.439,71.007
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_16123702687990528272_kernel,1001.0,1.38,4.16,1.3430000000000002,1001.0,1.48,2.464,1.3430000000000002,10001.0,1.325,4.096,1.311,10001.0,1.325,4.096,1.311,10001.0,1.326,4.288,1.311,10001.0,1.321,1.696,1.279,10001.0,3.342000,3.872000,3.263000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_13565419789138473644_kernel,1001.0,1.699,4.48,1.663,1001.0,1.813,3.744,1.632,10001.0,1.683,4.512,1.663,10001.0,1.683,4.512,1.663,10001.0,1.683,4.48,1.663,10001.0,1.663,2.176,1.631,10001.0,3.697000,4.288000,3.647000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_12792398503956406459_kernel,1001.0,1.546,4.288,1.504,1001.0,1.835,10.56,1.535,10001.0,1.557,9.312,1.535,10001.0,1.557,9.312,1.535,10001.0,1.556,4.64,1.535,10001.0,1.336,1.696,1.311,10001.0,3.335000,3.872000,3.263000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_10696948226895600933_kernel,1001.0,1.746,4.6080000000000005,1.696,1001.0,1.761,4.736000000000001,1.695,10001.0,1.741,4.672,1.696,10001.0,1.741,4.672,1.696,10001.0,1.741,4.544,1.696,10001.0,2.523,2.944,2.495,10001.0,4.615000,5.248000,4.543000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_8306947256418571765_kernel,1001.0,1.35,4.192,1.3119999999999998,1001.0,1.4269999999999998,4.096,1.311,10001.0,1.346,4.256,1.311,10001.0,1.346,4.256,1.311,10001.0,1.347,4.16,1.311,10001.0,1.582,2.144,1.536,10001.0,3.358000,5.855000,3.295000
output_gpu,googlenet,Fused_Cast_split_14878378123774812833_kernel,1001.0,2.135,5.024,2.111,1001.0,2.125,8.927999999999999,2.079,10001.0,2.132,5.056,2.079,10001.0,2.132,5.056,2.079,10001.0,2.13,5.024,2.08,10001.0,2.115,2.752,2.079,10001.0,4.210000,4.928000,4.159000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_15136315384912526565_kernel,1001.0,1.491,4.351,1.471,1001.0,1.5530000000000002,2.336,1.439,10001.0,1.481,4.351,1.44,10001.0,1.481,4.351,1.44,10001.0,1.481,4.352,1.439,10001.0,1.876,2.272,1.855,10001.0,3.675000,4.384000,3.615000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_18274578273128067791_kernel,1001.0,1.269,4.16,1.247,1001.0,1.414,2.464,1.216,10001.0,1.266,4.032,1.247,10001.0,1.266,4.032,1.247,10001.0,1.266,4.032,1.247,10001.0,1.336,6.879,1.247,10001.0,3.244000,3.744000,3.167000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_11774402490568796338_kernel,1001.0,2.281,5.376,2.175,1001.0,2.276,2.8160000000000003,2.144,10001.0,2.0,4.927,1.951,10001.0,2.0,4.927,1.951,10001.0,2.002,4.928,1.951,10001.0,2.001,2.591,1.951,10001.0,4.078000,4.736000,3.999000
output_gpu,googlenet,Fused_AddN_2045293737120921971_kernel,1001.0,344.386,351.73900000000003,341.372,1001.0,342.843,346.588,339.452,10001.0,343.041,351.833,339.67400000000004,10001.0,343.041,351.833,339.67400000000004,10001.0,342.956,351.45,339.898,10001.0,325.557,1072.018,322.94,10001.0,341.674,348.189,337.852
output_gpu,googlenet,Fused_Cast_Transpose_fusion_9911038317532081336_kernel,1001.0,5.345,8.512,5.184,1001.0,5.386,9.824,5.28,10001.0,5.364,8.576,5.247000000000001,10001.0,5.364,8.576,5.247000000000001,10001.0,5.365,8.64,5.248,10001.0,3.618,9.12,3.552,10001.0,5.832000,6.688000,5.728000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_1073140937459614419_kernel,1001.0,1.76,4.5760000000000005,1.727,1001.0,1.784,4.6080000000000005,1.696,10001.0,1.7480000000000002,4.6080000000000005,1.727,10001.0,1.7480000000000002,4.6080000000000005,1.727,10001.0,1.748,4.576,1.696,10001.0,1.726,3.968,1.695,10001.0,3.786000,4.416000,3.712000
output_gpu,googlenet,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.3730000000000002,3.36,1.3430000000000002,1001.0,1.349,2.624,1.1840000000000002,10001.0,1.42,3.488,1.4069999999999998,10001.0,1.42,3.488,1.4069999999999998,10001.0,1.421,3.488,1.376,10001.0,1.23,1.728,1.215,10001.0,3.054000,3.744000,3.007000
output_gpu,googlenet,Fused_Cast_split_17497149961194871015_kernel,1001.0,1.508,4.288,1.471,1001.0,1.582,2.3680000000000003,1.471,10001.0,1.505,4.384,1.471,10001.0,1.505,4.384,1.471,10001.0,1.505,4.448,1.471,10001.0,1.492,2.944,1.471,10001.0,3.526000,4.128000,3.455000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_6392901981805400676_kernel,1001.0,2.186,5.088,2.144,1001.0,2.305,6.176,2.143,10001.0,2.043,4.992,2.015,10001.0,2.043,4.992,2.015,10001.0,2.044,10.016,2.015,10001.0,2.02,2.528,1.983,10001.0,4.105000,4.832000,4.032000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_646512680943626843_kernel,1001.0,1.462,4.192,1.439,1001.0,1.588,2.3680000000000003,1.408,10001.0,1.408,4.16,1.375,10001.0,1.408,4.16,1.375,10001.0,1.408,4.192,1.375,10001.0,1.397,1.76,1.375,10001.0,3.432000,3.968000,3.359000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_15523860137076831978_kernel,1001.0,1.596,4.352,1.567,1001.0,1.5930000000000002,2.719,1.536,10001.0,1.525,4.512,1.503,10001.0,1.525,4.512,1.503,10001.0,1.525,4.352,1.503,10001.0,1.505,1.92,1.471,10001.0,3.536000,4.128000,3.487000
output_gpu,googlenet,Fused_Cast_split_2082569438736450855_kernel,1001.0,1.237,3.968,1.215,1001.0,1.41,2.496,1.216,10001.0,1.237,4.032,1.215,10001.0,1.237,4.032,1.215,10001.0,1.238,4.063,1.215,10001.0,1.253,1.728,1.215,10001.0,3.220000,3.712000,3.167000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_3389965200357903887_kernel,1001.0,1.991,4.928,1.952,1001.0,2.002,2.784,1.951,10001.0,1.946,4.864,1.888,10001.0,1.946,4.864,1.888,10001.0,1.945,4.896,1.919,10001.0,1.93,2.432,1.887,10001.0,3.999000,4.640000,3.935000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_15230960213609254388_kernel,1001.0,1.5519999999999998,4.416,1.535,1001.0,1.587,10.624,1.28,10001.0,1.3159999999999998,4.128,1.279,10001.0,1.3159999999999998,4.128,1.279,10001.0,1.316,4.256,1.279,10001.0,1.309,1.664,1.279,10001.0,3.080000,3.648000,3.007000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_5726143700767144363_kernel,1001.0,2.1,4.928,2.048,1001.0,2.08,5.5360000000000005,2.047,10001.0,1.883,4.736000000000001,1.855,10001.0,1.883,4.736000000000001,1.855,10001.0,1.883,4.768,1.855,10001.0,1.865,2.464,1.823,10001.0,3.945000,4.576000,3.871000
output_gpu,googlenet,Fused_Cast_split_6311674897489810490_kernel,1001.0,1.294,4.096,1.248,1001.0,1.55,9.248,1.248,10001.0,1.291,4.064,1.248,10001.0,1.291,4.064,1.248,10001.0,1.292,4.224,1.248,10001.0,1.292,4.192,1.247,10001.0,3.269000,3.808000,3.199000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_1735829833836606277_kernel,1001.0,1.725,4.832,1.695,1001.0,2.435,5.856,2.399,10001.0,2.438,5.44,2.399,10001.0,2.438,5.44,2.399,10001.0,2.439,5.472,2.399,10001.0,2.172,2.688,2.143,10001.0,4.235000,5.024000,4.159000
output_gpu,googlenet,Fused_Cast_Transpose_fusion_10392994258860098182_kernel,1001.0,2.6860000000000004,5.696000000000001,2.592,1001.0,2.688,5.856,2.624,10001.0,2.681,5.632000000000001,2.655,10001.0,2.681,5.632000000000001,2.655,10001.0,2.684,5.696,2.655,10001.0,2.257,2.656,2.208,10001.0,4.008000,4.704000,3.935000
output_gpu,googlenet,Fused_Cast_split_3353203469233290319_kernel,1001.0,2.658,5.408,2.56,1001.0,2.643,6.815,2.56,10001.0,2.652,5.6,2.528,10001.0,2.652,5.6,2.528,10001.0,2.65,5.504,2.528,10001.0,4.128,4.48,3.904,10001.0,4.777000,5.472000,4.639000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_13225314254291318122_kernel,1001.0,1.3130000000000002,4.0,1.279,1001.0,1.4409999999999998,4.768,1.28,10001.0,1.294,4.224,1.248,10001.0,1.294,4.224,1.248,10001.0,1.294,4.192,1.248,10001.0,1.307,1.888,1.248,10001.0,3.267000,3.840000,3.199000
output_gpu,googlenet,Fused_Mul_Reshape_Cast_Add_fusion_3174150719884109541_kernel,1001.0,1.813,4.6080000000000005,1.791,1001.0,1.918,4.288,1.76,10001.0,1.813,4.864,1.76,10001.0,1.813,4.864,1.76,10001.0,1.813,4.832,1.76,10001.0,1.803,2.24,1.759,10001.0,3.864000,4.480000,3.776000
output_gpu,googlenet,Fused_Mul_Transpose_Cast_Add_fusion_15000715337188127882_kernel,1001.0,3.5780000000000003,6.688,3.519,1001.0,3.557,4.192,3.487,10001.0,3.5610000000000004,6.688,3.487,10001.0,3.5610000000000004,6.688,3.487,10001.0,3.561,6.847,3.487,10001.0,6.835,9.888,6.655,10001.0,9.100000,10.432000,8.927000
output_gpu,googlenet,Fused_Cast_split_10704316376982652880_kernel,1001.0,1.73,4.64,1.696,1001.0,1.789,6.432,1.695,10001.0,1.726,4.6080000000000005,1.695,10001.0,1.726,4.6080000000000005,1.695,10001.0,1.726,4.64,1.695,10001.0,1.702,2.144,1.663,10001.0,3.756000,4.864000,3.711000
output_gpu,googlenet,Fused_Cast_split_16716776070374160166_kernel,1001.0,1.4680000000000002,4.192,1.439,1001.0,1.5019999999999998,2.24,1.408,10001.0,1.464,4.32,1.439,10001.0,1.464,4.32,1.439,10001.0,1.464,4.319,1.439,10001.0,1.445,1.856,1.407,10001.0,3.475000,4.065000,3.423000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_17882657875133424877_kernel,1001.0,1.264,3.968,1.247,1001.0,1.261,1.632,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,1.632,1.247,10001.0,3.217000,3.839000,3.167000
output_gpu,mobilenetv3,Fused_Mul_split_1342664299784032915_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.372,3.68,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.164,1.696,1.151,10001.0,3.008000,3.680000,2.943000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_1464885464384369972_kernel,1001.0,3.698,6.912000000000001,3.648,1001.0,3.708,6.752000000000001,3.648,10001.0,3.648,7.296,3.615,10001.0,3.648,7.296,3.615,10001.0,3.658,7.104,3.615,10001.0,3.644,4.64,3.584,10001.0,5.791000,7.008000,5.696000
output_gpu,mobilenetv3,Fused_Mul_fusion_16645970179795583297_kernel,1001.0,162.28,170.30200000000002,160.606,1001.0,145.424,152.253,144.605,10001.0,144.886,151.485,143.869,10001.0,144.886,151.485,143.869,10001.0,144.828,151.613,143.87,10001.0,140.097,143.614,139.134,10001.0,147.613,152.03,146.623
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_14861288715056940899_kernel,1001.0,1.233,3.936,1.215,1001.0,1.229,1.632,1.1840000000000002,10001.0,1.23,4.352,1.215,10001.0,1.23,4.352,1.215,10001.0,1.231,4.032,1.184,10001.0,1.23,1.6,1.215,10001.0,3.182000,3.712000,3.104000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_10748176867149085360_kernel,1001.0,2.481,5.599,2.399,1001.0,2.482,3.232,2.3680000000000003,10001.0,2.477,6.976,2.3680000000000003,10001.0,2.477,6.976,2.3680000000000003,10001.0,2.478,5.632,2.368,10001.0,2.332,8.064,2.303,10001.0,4.663000,5.440000,4.543000
output_gpu,mobilenetv3,Fused_Mul_fusion_13164648357494835259_kernel,1001.0,54.49,60.255,53.888000000000005,1001.0,54.484,60.672,53.951,10001.0,54.424,60.863,53.56699999999999,10001.0,54.424,60.863,53.56699999999999,10001.0,54.4,60.287,53.599,10001.0,54.354,58.303,53.663,10001.0,56.87,60.864000000000004,55.519
output_gpu,mobilenetv3,Fused_Add_fusion_8219214836775053601_kernel,1001.0,439.61800000000005,444.283,438.202,1001.0,439.631,441.72,438.265,10001.0,440.0580000000001,445.016,438.36,10001.0,440.0580000000001,445.016,438.36,10001.0,440.0580000000001,444.056,438.232,10001.0,426.102,1128.943,425.146,10001.0,440.998,444.764,439.804
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_13959723923376464580_kernel,1001.0,1.277,4.128,1.247,1001.0,1.287,9.216,1.247,10001.0,1.274,4.096,1.247,10001.0,1.274,4.096,1.247,10001.0,1.274,4.128,1.247,10001.0,1.286,4.384,1.247,10001.0,3.236000,3.807000,3.167000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_1883640624310466778_kernel,1001.0,5.428,8.191,5.375,1001.0,5.422999999999999,5.8870000000000005,5.375,10001.0,5.36,8.224,5.311,10001.0,5.36,8.224,5.311,10001.0,5.362,8.288,5.311,10001.0,5.361,5.792,5.311,10001.0,6.971000,7.520000,6.911000
output_gpu,mobilenetv3,Fused_RealDiv_Mul_Add_fusion_16910873801011469393_kernel,1001.0,76.234,82.175,75.487,1001.0,76.223,82.303,75.455,10001.0,76.09200000000001,81.88600000000001,75.199,10001.0,76.09200000000001,81.88600000000001,75.199,10001.0,76.079,81.69500000000002,75.13499999999998,10001.0,76.18700000000001,757.5889999999999,75.199,10001.0,78.794,82.175,77.727
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_3484297242631519392_kernel,1001.0,1.846,4.704,1.823,1001.0,1.844,2.4,1.823,10001.0,1.842,4.8,1.823,10001.0,1.842,4.8,1.823,10001.0,1.841,4.768,1.792,10001.0,1.672,2.112,1.631,10001.0,3.892000,4.512000,3.808000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_13740112193012234772_kernel,1001.0,1.264,4.096,1.247,1001.0,1.269,3.935,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.064,1.247,10001.0,1.261,3.136,1.247,10001.0,3.225000,3.777000,3.167000
output_gpu,mobilenetv3,Fused_Mul_ReduceSum_Add_split_64190903711288141_kernel,1001.0,5.492000000000001,8.256,5.44,1001.0,5.496,8.256,5.44,10001.0,5.417000000000001,8.32,5.375,10001.0,5.417000000000001,8.32,5.375,10001.0,5.413,8.352,5.344,10001.0,5.412,5.856,5.343,10001.0,6.984000,7.616000,6.912000
output_gpu,mobilenetv3,Fused_RealDiv_fusion_13076568714319119018_kernel,1001.0,73.117,79.327,72.575,1001.0,74.03,984.081,72.639,10001.0,72.983,79.711,72.318,10001.0,72.983,79.711,72.318,10001.0,72.97800000000001,79.22999999999998,72.351,10001.0,70.047,763.702,68.991,10001.0,75.196,79.295,74.49600000000001
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_2362238990757330009_kernel,1001.0,2.104,5.056,2.079,1001.0,2.106,4.96,2.079,10001.0,2.099,5.055,2.048,10001.0,2.099,5.055,2.048,10001.0,2.1,5.184,2.048,10001.0,2.102,2.72,2.048,10001.0,4.159000,4.896000,4.096000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_2666526202702675314_kernel,1001.0,1.457,4.032,1.439,1001.0,1.485,2.624,1.439,10001.0,1.454,9.088,1.439,10001.0,1.454,9.088,1.439,10001.0,1.454,4.128,1.439,10001.0,1.453,1.664,1.439,10001.0,3.285000,3.712000,3.231000
output_gpu,mobilenetv3,Fused_RealDiv_Mul_Add_fusion_34820967813013138_kernel,1001.0,212.778,219.006,211.453,1001.0,219.359,226.365,218.268,10001.0,219.417,226.204,218.075,10001.0,219.417,226.204,218.075,10001.0,219.415,225.404,218.109,10001.0,219.547,911.827,218.269,10001.0,221.15599999999998,225.054,219.934
output_gpu,mobilenetv3,Fused_BiasAdd_15211985742751576601_kernel,1001.0,1.264,3.935,1.247,1001.0,1.276,1.888,1.247,10001.0,1.262,4.0,1.247,10001.0,1.262,4.0,1.247,10001.0,1.261,4.0,1.247,10001.0,1.262,1.632,1.247,10001.0,3.236000,3.809000,3.167000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_93419331714277628_kernel,1001.0,1.332,4.096,1.311,1001.0,1.334,4.352,1.311,10001.0,1.329,4.256,1.311,10001.0,1.329,4.256,1.311,10001.0,1.331,9.6,1.311,10001.0,1.331,1.76,1.311,10001.0,3.306000,3.872000,3.231000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_16305455799062416657_kernel,1001.0,1.275,3.968,1.247,1001.0,1.279,3.968,1.247,10001.0,1.276,4.064,1.247,10001.0,1.276,4.064,1.247,10001.0,1.276,4.064,1.247,10001.0,1.099,1.44,1.087,10001.0,3.242000,3.776000,3.167000
output_gpu,mobilenetv3,Fused_Mul_fusion_7572473347176879639_kernel,1001.0,76.969,82.62299999999999,76.415,1001.0,76.844,80.255,76.255,10001.0,76.86399999999999,83.198,76.15899999999999,10001.0,76.86399999999999,83.198,76.15899999999999,10001.0,76.87100000000001,82.71799999999999,76.126,10001.0,76.831,80.319,76.19099999999999,10001.0,79.061,82.975,78.143
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_16904778296983262158_kernel,1001.0,2.272,5.312,2.239,1001.0,2.269,3.008,2.239,10001.0,2.22,5.44,2.176,10001.0,2.22,5.44,2.176,10001.0,2.221,5.44,2.207,10001.0,2.222,3.04,2.176,10001.0,3.901000,5.025000,3.839000
output_gpu,mobilenetv3,Fused_Add_fusion_10034880499458677202_kernel,1001.0,19.109,25.184,18.688,1001.0,19.115,25.504,18.656,10001.0,19.09,25.087000000000003,18.495,10001.0,19.09,25.087000000000003,18.495,10001.0,19.109,25.376,18.591,10001.0,18.661,22.656,18.143,10001.0,21.276000,25.440000,20.351000
output_gpu,mobilenetv3,Fused_BiasAdd_9325555480194444002_kernel,1001.0,1.69,4.607,1.663,1001.0,1.689,2.08,1.663,10001.0,1.69,4.512,1.663,10001.0,1.69,4.512,1.663,10001.0,1.691,4.544,1.663,10001.0,1.687,2.08,1.663,10001.0,3.700000,4.287000,3.647000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_15687251160003611935_kernel,1001.0,2.806,5.824,2.783,1001.0,2.803,3.36,2.783,10001.0,2.752,5.92,2.719,10001.0,2.752,5.92,2.719,10001.0,2.75,6.016,2.688,10001.0,2.752,3.456,2.719,10001.0,4.863000,6.687000,4.799000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_6098434791842584434_kernel,1001.0,1.268,4.128,1.247,1001.0,1.269,1.856,1.247,10001.0,1.266,4.096,1.247,10001.0,1.266,4.096,1.247,10001.0,1.266,4.288,1.247,10001.0,1.265,1.888,1.247,10001.0,3.221000,4.095000,3.167000
output_gpu,mobilenetv3,Fused_Add_fusion_7552099748779120068_kernel,1001.0,51.306,56.831,50.495,1001.0,51.288,52.383,50.495,10001.0,51.198,57.215,50.271,10001.0,51.198,57.215,50.271,10001.0,51.201,57.471,50.175,10001.0,49.288,52.799,48.704,10001.0,53.437,58.912,52.448
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_5369186430446809175_kernel,1001.0,1.281,4.0,1.247,1001.0,1.279,1.696,1.247,10001.0,1.279,4.096,1.247,10001.0,1.279,4.096,1.247,10001.0,1.279,4.256,1.247,10001.0,1.156,1.536,1.119,10001.0,3.251000,3.808000,3.199000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_17446330247610393356_kernel,1001.0,2.104,5.056,2.079,1001.0,2.102,2.688,2.048,10001.0,2.099,5.247999999999999,2.048,10001.0,2.099,5.247999999999999,2.048,10001.0,2.099,5.28,2.048,10001.0,2.101,2.688,2.048,10001.0,4.177000,4.865000,4.095000
output_gpu,mobilenetv3,Fused_BiasAdd_9289532139556865923_kernel,1001.0,2.64,5.568,2.592,1001.0,1.896,2.336,1.855,10001.0,1.895,6.144,1.824,10001.0,1.895,6.144,1.824,10001.0,1.896,4.992,1.855,10001.0,1.897,2.368,1.855,10001.0,3.977000,4.544000,3.872000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_3127792950821123225_kernel,1001.0,20.675,27.168000000000003,20.159,1001.0,20.679,24.896,19.999,10001.0,20.67,26.752,19.999,10001.0,20.67,26.752,19.999,10001.0,20.599,26.399,19.936,10001.0,20.167,23.904,19.487,10001.0,23.086,27.488,21.728
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_11392482609354099158_kernel,1001.0,2.424,5.632000000000001,2.399,1001.0,2.422,5.5360000000000005,2.399,10001.0,2.381,5.664,2.367,10001.0,2.381,5.664,2.367,10001.0,2.383,5.632,2.336,10001.0,2.388,3.232,2.367,10001.0,4.404000,5.376000,4.320000
output_gpu,mobilenetv3,Fused_RealDiv_fusion_14514309178616297254_kernel,1001.0,144.403,149.886,143.806,1001.0,145.161,859.4110000000001,143.741,10001.0,144.317,150.525,143.582,10001.0,144.317,150.525,143.582,10001.0,144.30499999999995,150.30200000000002,143.58100000000002,10001.0,138.641,878.9,137.662,10001.0,146.92,150.87900000000002,145.983
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_16734348320667906523_kernel,1001.0,1.3330000000000002,4.16,1.311,1001.0,1.332,1.824,1.311,10001.0,1.33,4.256,1.311,10001.0,1.33,4.256,1.311,10001.0,1.33,4.128,1.311,10001.0,1.146,1.792,1.119,10001.0,3.352000,3.936000,3.295000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_6698822718208382412_kernel,1001.0,2.484,5.504,2.399,1001.0,2.481,3.136,2.4,10001.0,2.477,5.632000000000001,2.3680000000000003,10001.0,2.477,5.632000000000001,2.3680000000000003,10001.0,2.478,5.664,2.368,10001.0,2.333,4.096,2.303,10001.0,4.641000,5.408000,4.511000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_9150516105612865020_kernel,1001.0,1.238,3.968,1.215,1001.0,1.239,1.6,1.215,10001.0,1.236,4.032,1.215,10001.0,1.236,4.032,1.215,10001.0,1.235,4.256,1.215,10001.0,1.235,1.728,1.215,10001.0,3.158000,3.712000,3.103000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_8587229255997270616_kernel,1001.0,1.324,4.096,1.28,1001.0,1.324,1.824,1.28,10001.0,1.3219999999999998,4.128,1.279,10001.0,1.3219999999999998,4.128,1.279,10001.0,1.321,4.288,1.279,10001.0,1.322,1.792,1.279,10001.0,3.305000,3.872000,3.231000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_9425642578143992248_kernel,1001.0,1.6869999999999998,4.5760000000000005,1.663,1001.0,1.685,2.176,1.663,10001.0,1.685,4.64,1.663,10001.0,1.685,4.64,1.663,10001.0,1.685,4.544,1.663,10001.0,1.689,2.144,1.663,10001.0,3.704000,4.320000,3.647000
output_gpu,mobilenetv3,Fused_Add_fusion_8427914361167663919_kernel,1001.0,70.699,76.416,70.111,1001.0,70.686,76.575,70.079,10001.0,70.62599999999998,76.319,69.854,10001.0,70.62599999999998,76.319,69.854,10001.0,70.617,76.222,69.91799999999999,10001.0,68.622,780.756,67.871,10001.0,72.80799999999999,76.224,72.03099999999999
output_gpu,mobilenetv3,Fused_ReduceMean_4445296748968516267_kernel,1001.0,67.22,78.271,66.56,1001.0,67.234,75.903,66.59100000000001,10001.0,67.46199999999999,79.51899999999999,66.334,10001.0,67.46199999999999,79.51899999999999,66.334,10001.0,67.325,79.711,66.239,10001.0,66.69999999999999,75.77499999999999,66.335,10001.0,70.04299999999999,79.038,69.343000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_12889760271667858406_kernel,1001.0,2.323,5.408,2.303,1001.0,2.322,3.072,2.303,10001.0,2.271,5.568,2.239,10001.0,2.271,5.568,2.239,10001.0,2.274,5.568,2.239,10001.0,2.272,3.264,2.239,10001.0,4.269000,5.375000,4.192000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_7919582682115872083_kernel,1001.0,1.689,4.448,1.663,1001.0,1.685,2.24,1.663,10001.0,1.685,4.512,1.663,10001.0,1.685,4.512,1.663,10001.0,1.685,4.672,1.663,10001.0,1.686,2.112,1.663,10001.0,3.704000,4.480000,3.647000
output_gpu,mobilenetv3,Fused_Add_fusion_17578593068380632040_kernel,1001.0,165.722,170.525,164.862,1001.0,166.50400000000002,948.496,164.925,10001.0,165.77200000000002,170.62,164.92499999999995,10001.0,165.77200000000002,170.62,164.92499999999995,10001.0,165.78099999999998,170.525,164.893,10001.0,161.136,164.573,160.382,10001.0,167.726,171.294,166.494
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_14954898335693053923_kernel,1001.0,1.248,3.936,1.215,1001.0,1.249,3.936,1.215,10001.0,1.246,4.0,1.215,10001.0,1.246,4.0,1.215,10001.0,1.246,4.032,1.215,10001.0,1.245,1.632,1.215,10001.0,3.176000,3.712000,3.103000
output_gpu,mobilenetv3,Fused_Mul_fusion_13733023487931009039_kernel,1001.0,99.273,105.855,98.463,1001.0,88.13799999999999,92.19,87.454,10001.0,87.72200000000001,94.398,86.94200000000002,10001.0,87.72200000000001,94.398,86.94200000000002,10001.0,87.743,94.558,86.91,10001.0,84.824,89.087,84.095,10001.0,90.77799999999999,95.168,89.727
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_14297157305962456785_kernel,1001.0,1.846,4.672,1.7919999999999998,1001.0,1.853,4.672,1.823,10001.0,1.843,8.8,1.7919999999999998,10001.0,1.843,8.8,1.7919999999999998,10001.0,1.842,4.928,1.791,10001.0,1.842,2.4,1.792,10001.0,3.892000,4.608000,3.807000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_11803917048075250266_kernel,1001.0,1.6569999999999998,4.416,1.631,1001.0,1.655,2.048,1.631,10001.0,1.654,4.544,1.631,10001.0,1.654,4.544,1.631,10001.0,1.654,4.608,1.631,10001.0,1.666,7.328,1.631,10001.0,3.661000,6.816000,3.615000
output_gpu,mobilenetv3,Fused_BiasAdd_12836414289050732582_kernel,1001.0,1.744,4.6080000000000005,1.727,1001.0,1.745,2.176,1.727,10001.0,1.742,4.64,1.696,10001.0,1.742,4.64,1.696,10001.0,1.743,4.608,1.696,10001.0,1.731,2.112,1.695,10001.0,3.509000,4.064000,3.455000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_10884096003968592211_kernel,1001.0,2.006,4.928,1.952,1001.0,2.003,2.56,1.952,10001.0,2.002,4.992,1.952,10001.0,2.002,4.992,1.952,10001.0,2.001,5.056,1.952,10001.0,2.054,2.592,2.015,10001.0,4.099000,4.863000,4.031000
output_gpu,mobilenetv3,Fused_Add_fusion_3417841126287807577_kernel,1001.0,36.506,42.879,36.031,1001.0,36.521,44.607,35.872,10001.0,36.432,42.464,35.712,10001.0,36.432,42.464,35.712,10001.0,36.409,43.391000000000005,35.80699999999999,10001.0,35.156,39.487,34.56,10001.0,38.591,43.166999999999994,37.503
output_gpu,mobilenetv3,Fused_BiasAdd_17648792637236587805_kernel,1001.0,2.432,5.343999999999999,2.399,1001.0,1.878,4.736000000000001,1.855,10001.0,1.87,4.8,1.823,10001.0,1.87,4.8,1.823,10001.0,1.87,4.704,1.824,10001.0,1.871,2.24,1.824,10001.0,3.934000,4.479000,3.840000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_13883274358138881903_kernel,1001.0,2.94,5.92,2.911,1001.0,2.945,6.144,2.911,10001.0,2.927,9.632,2.879,10001.0,2.927,9.632,2.879,10001.0,2.925,6.367,2.879,10001.0,2.927,4.096,2.88,10001.0,5.003000,6.016000,4.928000
output_gpu,mobilenetv3,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.183,1001.0,1.222,1.632,1.1840000000000002,10001.0,1.223,3.2,1.183,10001.0,1.223,3.2,1.183,10001.0,1.223,3.072,1.183,10001.0,1.207,1.504,1.183,10001.0,3.062000,3.585000,3.007000
output_gpu,mobilenetv3,Fused_RealDiv_fusion_9044847223339210436_kernel,1001.0,87.552,93.246,87.007,1001.0,87.55,93.727,87.039,10001.0,87.37599999999998,93.214,86.68700000000001,10001.0,87.37599999999998,93.214,86.68700000000001,10001.0,87.35,93.214,86.68599999999998,10001.0,83.848,87.583,83.07100000000001,10001.0,89.556,93.47,88.799
output_gpu,mobilenetv3,Fused_Mul_fusion_15419329260743817884_kernel,1001.0,98.726,104.895,97.951,1001.0,88.12700000000001,91.902,87.42200000000001,10001.0,87.84,94.334,86.97399999999998,10001.0,87.84,94.334,86.97399999999998,10001.0,87.87,94.558,87.102,10001.0,84.95800000000001,89.375,84.095,10001.0,90.097,94.336,89.247
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_5833392276662101320_kernel,1001.0,1.523,4.288,1.503,1001.0,1.52,1.92,1.503,10001.0,1.519,4.416,1.472,10001.0,1.519,4.416,1.472,10001.0,1.519,4.736,1.472,10001.0,1.375,1.792,1.343,10001.0,3.524000,4.128000,3.455000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_10711044139108437406_kernel,1001.0,1.262,4.16,1.247,1001.0,1.259,1.6,1.247,10001.0,1.259,4.192,1.247,10001.0,1.259,4.192,1.247,10001.0,1.259,4.064,1.247,10001.0,1.294,5.184,1.247,10001.0,3.215000,3.808000,3.135000
output_gpu,mobilenetv3,Fused_Reshape_Sub_Exp_ReduceSum_split_4086276757985005905_kernel,1001.0,7.581,10.592,7.519,1001.0,7.579,8.192,7.519,10001.0,3.11,6.88,3.071,10001.0,3.11,6.88,3.071,10001.0,3.113,6.944,3.071,10001.0,2.669,4.0,2.624,10001.0,4.799000,5.984000,4.735000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_9776070476940196447_kernel,1001.0,1.276,4.064,1.247,1001.0,1.3019999999999998,9.6,1.247,10001.0,1.273,4.256,1.247,10001.0,1.273,4.256,1.247,10001.0,1.276,4.096,1.247,10001.0,1.274,1.632,1.247,10001.0,3.239000,3.808000,3.167000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_2252472690218872289_kernel,1001.0,1.268,3.936,1.247,1001.0,1.298,4.192,1.247,10001.0,1.267,4.192,1.247,10001.0,1.267,4.192,1.247,10001.0,1.267,4.032,1.247,10001.0,1.267,1.632,1.247,10001.0,3.232000,3.903000,3.167000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_9747693032472106124_kernel,1001.0,1.3230000000000002,4.032,1.28,1001.0,1.338,2.432,1.28,10001.0,1.32,4.128,1.279,10001.0,1.32,4.128,1.279,10001.0,1.32,4.192,1.28,10001.0,1.32,1.696,1.28,10001.0,3.320000,3.904000,3.232000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_18425500621849517783_kernel,1001.0,1.263,3.935,1.247,1001.0,1.29,4.704,1.247,10001.0,1.262,4.0,1.247,10001.0,1.262,4.0,1.247,10001.0,1.262,4.032,1.247,10001.0,1.303,11.264,1.247,10001.0,3.235000,3.808000,3.167000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_1183499294141441093_kernel,1001.0,2.081,5.152,2.047,1001.0,2.077,2.592,2.047,10001.0,2.076,5.247999999999999,2.016,10001.0,2.076,5.247999999999999,2.016,10001.0,2.075,5.216,2.016,10001.0,2.08,2.656,2.047,10001.0,4.151000,4.865000,4.064000
output_gpu,mobilenetv3,Fused_Mul_fusion_10004199505558456955_kernel,1001.0,158.441,164.926,157.694,1001.0,145.536,149.565,144.83,10001.0,144.93800000000005,151.613,143.965,10001.0,144.93800000000005,151.613,143.965,10001.0,144.901,152.126,143.934,10001.0,140.423,849.7479999999999,139.262,10001.0,147.53900000000002,152.062,146.654
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_4585897219534964580_kernel,1001.0,1.269,4.32,1.247,1001.0,1.27,1.7919999999999998,1.247,10001.0,1.266,4.288,1.247,10001.0,1.266,4.288,1.247,10001.0,1.266,4.128,1.247,10001.0,1.144,1.504,1.119,10001.0,3.209000,3.935000,3.167000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_4177582663050197895_kernel,1001.0,2.592,5.664,2.559,1001.0,2.589,3.232,2.559,10001.0,2.54,6.112,2.496,10001.0,2.54,6.112,2.496,10001.0,2.538,5.824,2.495,10001.0,2.54,3.36,2.496,10001.0,4.201000,5.472000,3.424000
output_gpu,mobilenetv3,Fused_ReduceMean_728894389712972095_kernel,1001.0,41.792,54.687,41.247,1001.0,41.78,50.175,41.183,10001.0,41.795,52.735,40.8,10001.0,41.795,52.735,40.8,10001.0,42.24100000000001,53.887,40.927,10001.0,41.363,747.574,40.896,10001.0,44.265,53.631,43.711000
output_gpu,mobilenetv3,Fused_BiasAdd_14101026560331543454_kernel,1001.0,1.263,3.968,1.247,1001.0,1.265,1.664,1.247,10001.0,1.261,4.032,1.216,10001.0,1.261,4.032,1.216,10001.0,1.262,4.064,1.247,10001.0,1.261,1.632,1.216,10001.0,3.020000,7.136000,2.943000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_7280306889558148606_kernel,1001.0,1.327,4.096,1.311,1001.0,1.35,8.512,1.311,10001.0,1.325,4.352,1.311,10001.0,1.325,4.352,1.311,10001.0,1.325,4.128,1.311,10001.0,1.325,1.728,1.311,10001.0,3.324000,3.904000,3.263000
output_gpu,mobilenetv3,Fused_RealDiv_Mul_Add_fusion_12809191724530098095_kernel,1001.0,298.135,303.613,296.92400000000004,1001.0,306.10400000000004,312.827,304.923,10001.0,306.241,312.378,305.0180000000001,10001.0,306.241,312.378,305.0180000000001,10001.0,306.242,313.05,304.954,10001.0,306.523,1033.585,304.956,10001.0,307.895,312.15700000000004,306.398
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_6573896349788424007_kernel,1001.0,1.264,3.936,1.247,1001.0,1.266,1.76,1.247,10001.0,1.262,4.0,1.247,10001.0,1.262,4.0,1.247,10001.0,1.264,9.344,1.247,10001.0,1.262,1.632,1.247,10001.0,3.233000,3.807000,3.167000
output_gpu,mobilenetv3,Fused_BiasAdd_15121119845190207968_kernel,1001.0,2.597,5.504,2.528,1001.0,2.003,9.44,1.952,10001.0,1.994,4.864,1.951,10001.0,1.994,4.864,1.951,10001.0,1.994,4.992,1.951,10001.0,1.994,2.432,1.952,10001.0,4.040000,4.672000,3.999000
output_gpu,mobilenetv3,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.3730000000000002,3.424,1.3430000000000002,1001.0,1.422,1.952,1.4069999999999998,10001.0,1.421,3.487,1.376,10001.0,1.421,3.487,1.376,10001.0,1.42,3.488,1.407,10001.0,1.425,1.952,1.407,10001.0,3.062000,3.808000,3.007000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_11466898865907755338_kernel,1001.0,1.396,4.288,1.375,1001.0,1.393,1.7919999999999998,1.375,10001.0,1.393,4.224,1.375,10001.0,1.393,4.224,1.375,10001.0,1.393,4.352,1.375,10001.0,1.393,1.792,1.375,10001.0,3.411000,4.000000,3.329000
output_gpu,mobilenetv3,Fused_BiasAdd_17554781319156776008_kernel,1001.0,2.257,5.056,2.208,1001.0,2.255,2.688,2.208,10001.0,2.253,5.12,2.207,10001.0,2.253,5.12,2.207,10001.0,2.253,5.312,2.207,10001.0,2.254,2.784,2.207,10001.0,4.353000,4.928000,4.287000
output_gpu,mobilenetv3,Fused_Mul_fusion_11667805904163872099_kernel,1001.0,151.393,156.958,150.622,1001.0,158.167,161.725,156.094,10001.0,158.329,164.701,155.645,10001.0,158.329,164.701,155.645,10001.0,158.336,163.86900000000003,155.262,10001.0,158.537,885.298,156.285,10001.0,159.78300000000002,164.47899999999998,157.599
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_11087746713115392759_kernel,1001.0,1.293,4.192,1.248,1001.0,1.297,4.064,1.248,10001.0,1.29,4.159,1.247,10001.0,1.29,4.159,1.247,10001.0,1.29,4.096,1.247,10001.0,1.29,1.696,1.247,10001.0,3.254000,3.968000,3.168000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_552047198067261288_kernel,1001.0,1.242,3.968,1.215,1001.0,1.24,1.632,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.032,1.215,10001.0,1.12,1.472,1.087,10001.0,3.172000,3.712000,3.103000
output_gpu,mobilenetv3,Fused_RealDiv_Mul_Add_fusion_8732084563362980160_kernel,1001.0,108.41,113.631,107.327,1001.0,108.417,114.078,107.422,10001.0,108.253,114.782,107.102,10001.0,108.253,114.782,107.102,10001.0,108.255,113.662,107.07,10001.0,108.211,111.583,107.007,10001.0,110.15700000000001,114.463,109.119
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_16400814057734860311_kernel,1001.0,2.005,4.928,1.952,1001.0,2.003,2.688,1.952,10001.0,2.001,5.024,1.952,10001.0,2.001,5.024,1.952,10001.0,2.0,5.28,1.951,10001.0,1.765,2.336,1.727,10001.0,4.102000,4.928000,4.032000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_15014316899863771625_kernel,1001.0,20.0,25.695,19.391,1001.0,19.98,25.983,19.423,10001.0,19.959,25.791,19.296,10001.0,19.959,25.791,19.296,10001.0,19.985,25.696,19.231,10001.0,19.422,23.488,18.848,10001.0,22.186,26.303,20.832
output_gpu,mobilenetv3,Fused_BiasAdd_885158275901416228_kernel,1001.0,1.49,4.352,1.471,1001.0,1.489,1.92,1.471,10001.0,1.4880000000000002,4.416,1.471,10001.0,1.4880000000000002,4.416,1.471,10001.0,1.488,4.32,1.471,10001.0,1.486,1.888,1.471,10001.0,3.488000,4.415000,3.423000
output_gpu,mobilenetv3,Fused_Mul_fusion_16831524680947881506_kernel,1001.0,211.156,216.605,210.301,1001.0,220.421,223.74,218.556,10001.0,220.55900000000003,226.62,217.403,10001.0,220.55900000000003,226.62,217.403,10001.0,220.505,224.668,217.148,10001.0,220.788,906.227,218.524,10001.0,221.774,225.054,219.54999999999998
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_2980476326470745670_kernel,1001.0,1.325,4.064,1.311,1001.0,1.331,8.8,1.28,10001.0,1.3230000000000002,9.408,1.279,10001.0,1.3230000000000002,9.408,1.279,10001.0,1.322,4.128,1.279,10001.0,1.322,2.016,1.28,10001.0,3.319000,4.000000,3.232000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_13456095275253828121_kernel,1001.0,1.247,3.936,1.216,1001.0,1.266,3.935,1.216,10001.0,1.246,4.032,1.215,10001.0,1.246,4.032,1.215,10001.0,1.244,4.032,1.215,10001.0,1.246,9.6,1.215,10001.0,3.180000,3.744000,3.103000
output_gpu,mobilenetv3,Fused_Reshape_Add_split_8984716591323419489_kernel,1001.0,2.784,5.696000000000001,2.72,1001.0,2.414,2.912,2.3040000000000003,10001.0,2.407,9.504,2.272,10001.0,2.407,9.504,2.272,10001.0,2.408,6.56,2.271,10001.0,1.923,2.368,1.856,10001.0,4.219000,4.768000,4.096000
output_gpu,mobilenetv3,Fused_Mul_BiasAddGrad_Add_fusion_16718397833147263956_kernel,1001.0,4.508,7.327999999999999,4.448,1001.0,4.507,5.023,4.447,10001.0,4.465,7.456,4.415,10001.0,4.465,7.456,4.415,10001.0,4.466,7.584,4.384,10001.0,4.477,5.024,4.415,10001.0,6.672000,7.456000,6.560000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_3499318400568782360_kernel,1001.0,1.3090000000000002,4.256,1.279,1001.0,1.308,1.696,1.279,10001.0,1.3090000000000002,9.216,1.279,10001.0,1.3090000000000002,9.216,1.279,10001.0,1.307,4.127,1.279,10001.0,1.181,1.6,1.151,10001.0,3.304000,4.320000,3.231000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_16859602972933886463_kernel,1001.0,1.3230000000000002,4.064,1.28,1001.0,1.3219999999999998,1.7280000000000002,1.28,10001.0,1.32,4.128,1.279,10001.0,1.32,4.128,1.279,10001.0,1.32,4.128,1.279,10001.0,1.136,1.504,1.119,10001.0,3.320000,3.872000,3.232000
output_gpu,mobilenetv3,Fused_Mul_Add_fusion_12073752731691602938_kernel,1001.0,1.263,3.936,1.247,1001.0,1.27,3.968,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,4.032,1.247,10001.0,1.261,1.632,1.247,10001.0,3.218000,3.744000,3.167000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_11320170747834243405_kernel,1001.0,1.057,3.648,1.023,1001.0,1.057,1.3119999999999998,1.023,10001.0,1.056,3.712,1.023,10001.0,1.056,3.712,1.023,10001.0,1.063,3.744,1.023,10001.0,0.947,1.376,0.895,10001.0,2.814000,3.264000,2.721000
output_gpu,bert_large,Fused_Cast_fusion_4627739876415716441_kernel,1001.0,1.3769999999999998,3.392,1.3430000000000002,1001.0,1.3769999999999998,3.36,1.3430000000000002,10001.0,1.374,3.4560000000000004,1.3430000000000002,10001.0,1.374,3.4560000000000004,1.3430000000000002,10001.0,1.374,3.456,1.343,10001.0,1.381,5.856,1.343,10001.0,3.239000,4.480000,3.167000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_12033707814328470074_kernel,1001.0,1.224,2.9760000000000004,1.1840000000000002,1001.0,1.232,1.7280000000000002,1.183,10001.0,1.224,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.224,3.04,1.183,10001.0,1.072,2.08,1.023,10001.0,2.879000,3.488000,2.815000
output_gpu,bert_large,Fused_Mul_Mul_Mul_ReduceSum_split_8557813870137399269_kernel,1001.0,167.95,175.582,165.822,1001.0,169.03099999999998,174.333,167.614,10001.0,168.87599999999998,178.65200000000004,166.077,10001.0,168.87599999999998,178.65200000000004,166.077,10001.0,168.857,176.157,165.917,10001.0,168.984,174.75,167.038,10001.0,169.94400000000002,176.863,167.807000
output_gpu,bert_large,Fused_AdamWeightDecay_1963084975996941177_kernel,1001.0,1.971,5.343999999999999,1.951,1001.0,2.285,16.16,1.951,10001.0,2.117,5.28,1.951,10001.0,2.117,5.28,1.951,10001.0,2.121,5.408,1.951,10001.0,1.85,2.464,1.664,10001.0,3.916000,4.768000,3.712000
output_gpu,bert_large,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_ReduceSum_split_9548002235260671700_kernel,1001.0,7.358,13.632,7.232,1001.0,6.327000000000001,11.04,6.207999999999999,10001.0,5.354,9.951,5.215,10001.0,5.354,9.951,5.215,10001.0,5.358,9.504,5.247,10001.0,5.378,6.944,5.248,10001.0,6.966000,7.840000,6.816000
output_gpu,bert_large,Fused_Sub_Exp_ReduceSum_split_15349239229028166055_kernel,1001.0,112.944,123.231,106.27,1001.0,111.245,120.702,103.646,10001.0,106.906,121.214,102.75,10001.0,106.906,121.214,102.75,10001.0,106.137,122.462,102.846,10001.0,104.965,111.583,102.974,10001.0,112.084,120.991,110.271000
output_gpu,bert_large,Fused_ReduceSum_split_9610770192294330038_kernel,1001.0,2.356,4.672,2.335,1001.0,2.357,4.736000000000001,2.335,10001.0,2.077,4.6080000000000005,2.047,10001.0,2.077,4.6080000000000005,2.047,10001.0,2.077,4.416,2.047,10001.0,1.778,2.528,1.759,10001.0,4.082000,5.152000,4.031000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_7234642253636079393_kernel,1001.0,1.391,4.383,1.375,1001.0,1.388,1.919,1.375,10001.0,1.388,4.288,1.375,10001.0,1.388,4.288,1.375,10001.0,1.388,4.352,1.375,10001.0,1.193,1.664,1.183,10001.0,3.146000,3.840000,3.103000
output_gpu,bert_large,Fused_ReduceSum_split_5286512164795600183_kernel,1001.0,88.87200000000001,94.015,83.55,1001.0,76.188,79.391,71.807,10001.0,21.353,31.52,20.831,10001.0,21.353,31.52,20.831,10001.0,21.348000000000003,31.263,20.927,10001.0,21.089,28.351000000000003,20.703,10001.0,23.082000,30.976,22.528000
output_gpu,bert_large,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Add_Mul_R_more_split_4153845097368568711_kernel,1001.0,1946.745,2609.407,1117.0739999999998,1001.0,1910.92,3212.97,1119.181,10001.0,254.981,262.81100000000004,251.099,10001.0,254.981,262.81100000000004,251.099,10001.0,281.504,288.219,276.827,10001.0,281.568,996.883,274.876,10001.0,283.221,291.71000000000004,278.813000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_10182089266276682804_kernel,1001.0,51.276,54.175,51.231,1001.0,1.581,4.736000000000001,1.536,10001.0,1.578,4.48,1.536,10001.0,1.578,4.48,1.536,10001.0,1.578,4.64,1.535,10001.0,1.578,2.144,1.536,10001.0,3.342000,4.000000,3.295000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_10261424932202926908_kernel,1001.0,1.3530000000000002,4.16,1.3119999999999998,1001.0,1.352,2.016,1.3119999999999998,10001.0,1.351,4.288,1.311,10001.0,1.351,4.288,1.311,10001.0,1.351,4.224,1.311,10001.0,1.351,7.199,1.311,10001.0,3.080000,3.872000,3.007000
output_gpu,bert_large,Fused_ReduceMax_split_702931466894663908_kernel,1001.0,1.223,3.936,1.1840000000000002,1001.0,1.229,8.863999999999999,1.1840000000000002,10001.0,1.396,3.2960000000000003,1.375,10001.0,1.396,3.2960000000000003,1.375,10001.0,1.396,3.488,1.375,10001.0,1.201,1.568,1.183,10001.0,3.037000,3.680000,2.975000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_5219607149713909801_kernel,1001.0,1.053,3.648,1.023,1001.0,1.055,1.408,1.023,10001.0,1.056,3.712,1.023,10001.0,1.056,3.712,1.023,10001.0,1.053,3.712,1.023,10001.0,1.061,6.912,1.023,10001.0,2.997000,3.456000,2.943000
output_gpu,bert_large,Fused_Mul_split_3529573971242025759_kernel,1001.0,1.232,3.936,1.215,1001.0,1.119,1.472,1.087,10001.0,1.23,4.0,1.215,10001.0,1.23,4.0,1.215,10001.0,1.23,4.032,1.215,10001.0,1.229,1.664,1.12,10001.0,3.164000,3.744000,3.103000
output_gpu,bert_large,Fused_AdamWeightDecay_15275906070968866735_kernel,1001.0,147.665,154.142,146.814,1001.0,148.884,879.985,147.19799999999998,10001.0,148.143,154.238,147.261,10001.0,148.143,154.238,147.261,10001.0,147.453,154.013,146.59,10001.0,148.28,848.501,147.198,10001.0,150.732,155.006,149.471
output_gpu,bert_large,Fused_Reshape_Transpose_fusion_10392838085547651061_kernel,1001.0,47.332,52.895,46.144,1001.0,45.583,52.127,44.447,10001.0,42.865,49.087,42.24,10001.0,42.865,49.087,42.24,10001.0,42.902,49.152,42.175,10001.0,94.533,698.519,94.175,10001.0,106.31,107.583,105.95100000000001
output_gpu,bert_large,Fused_AdamWeightDecay_17862896559514098625_kernel,1001.0,2.002,5.088,1.983,1001.0,1.894,2.432,1.7280000000000002,10001.0,2.198,5.152,1.984,10001.0,2.198,5.152,1.984,10001.0,2.19,5.152,1.983,10001.0,1.846,2.432,1.695,10001.0,3.681000,4.608000,3.616000
output_gpu,bert_large,Fused_Add_split_17831152759103575343_kernel,1001.0,1.243,3.936,1.215,1001.0,1.24,1.6,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.0,1.215,10001.0,1.24,4.032,1.215,10001.0,1.069,1.472,1.024,10001.0,2.953000,3.456000,2.911000
output_gpu,bert_large,Fused_Mul_ReduceSum_split_6875858508543751623_kernel,1001.0,2.166,4.6080000000000005,2.143,1001.0,1.964,2.8480000000000003,1.951,10001.0,2.106,9.056,2.079,10001.0,2.106,9.056,2.079,10001.0,2.105,9.28,2.079,10001.0,1.805,2.592,1.791,10001.0,4.129000,5.376000,4.063000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_5882757187289266061_kernel,1001.0,1.3730000000000002,4.16,1.3430000000000002,1001.0,1.371,1.856,1.3430000000000002,10001.0,1.37,4.256,1.3430000000000002,10001.0,1.37,4.256,1.3430000000000002,10001.0,1.37,4.32,1.343,10001.0,1.178,1.632,1.151,10001.0,3.109000,3.712000,3.040000
output_gpu,bert_large,Fused_Mul_Mul_Add_split_8316169649112027801_kernel,1001.0,47.609,53.023,46.688,1001.0,45.495,48.927,44.67100000000001,10001.0,45.183,51.679,44.063,10001.0,45.183,51.679,44.063,10001.0,45.201,50.94300000000001,44.159,10001.0,43.277,47.295,42.272,10001.0,46.923,51.455,46.175000000000004
output_gpu,bert_large,Fused_Tanh_fusion_13362060561255152474_kernel,1001.0,1.499,4.224,1.471,1001.0,1.289,1.664,1.248,10001.0,1.496,4.447,1.471,10001.0,1.496,4.447,1.471,10001.0,1.496,4.352,1.471,10001.0,1.499,4.544,1.471,10001.0,3.268000,3.776000,3.200000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_15482918794471314863_kernel,1001.0,50.963,53.792,50.943000000000005,1001.0,1.211,4.128,1.183,10001.0,1.206,3.936,1.183,10001.0,1.206,3.936,1.183,10001.0,1.207,3.968,1.183,10001.0,1.207,1.6,1.183,10001.0,3.212000,3.808000,3.135000
output_gpu,bert_large,Fused_Minimum_fusion_13830424263623313460_kernel,1001.0,1.374,3.36,1.3430000000000002,1001.0,1.375,3.36,1.3430000000000002,10001.0,1.372,3.424,1.3430000000000002,10001.0,1.372,3.424,1.3430000000000002,10001.0,1.372,3.424,1.343,10001.0,1.179,1.696,1.151,10001.0,3.234000,3.968000,3.167000
output_gpu,bert_large,Fused_Mul_Add_Rsqrt_split_18366955607753806381_kernel,1001.0,1.274,4.064,1.247,1001.0,1.271,1.632,1.247,10001.0,1.272,4.064,1.247,10001.0,1.272,4.064,1.247,10001.0,1.271,4.064,1.247,10001.0,1.194,2.272,1.056,10001.0,3.021000,3.648000,2.944000
output_gpu,bert_large,Fused_AdamWeightDecay_15070480311508545235_kernel,1001.0,147.542,154.078,146.75,1001.0,147.83700000000002,154.045,146.846,10001.0,148.911,154.81300000000005,147.87,10001.0,148.911,154.81300000000005,147.87,10001.0,148.127,154.333,147.22899999999996,10001.0,148.174,154.846,147.07,10001.0,149.87800000000001,154.59,148.959
output_gpu,bert_large,Fused_AdamWeightDecay_6463732682852089878_kernel,1001.0,2.123,5.12,1.92,1001.0,2.105,2.688,1.92,10001.0,2.132,9.279,1.92,10001.0,2.132,9.279,1.92,10001.0,2.085,5.216,1.919,10001.0,1.943,2.72,1.919,10001.0,3.693000,4.641000,3.647000
output_gpu,bert_large,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_14596789223470331286_kernel,1001.0,1.354,4.032,1.3119999999999998,1001.0,1.352,1.696,1.3119999999999998,10001.0,1.352,4.128,1.311,10001.0,1.352,4.128,1.311,10001.0,1.351,4.096,1.311,10001.0,1.352,1.696,1.311,10001.0,3.293000,3.840000,3.231000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_6600323157333801920_kernel,1001.0,1.0490000000000002,3.616,1.023,1001.0,1.047,1.3119999999999998,1.023,10001.0,1.052,3.872,1.023,10001.0,1.052,3.872,1.023,10001.0,1.046,3.712,1.023,10001.0,1.452,36.416,1.023,10001.0,3.006000,3.551000,2.943000
output_gpu,bert_large,Fused_Mul_ReduceSum_split_2560788304788358183_kernel,1001.0,94.564,98.879,87.42299999999999,1001.0,96.282,97.47,96.191,10001.0,2.169,5.376,2.143,10001.0,2.169,5.376,2.143,10001.0,2.169,5.472,2.143,10001.0,1.864,2.56,1.824,10001.0,3.854000,4.800000,3.775000
output_gpu,bert_large,Fused_AdamWeightDecay_55711373873461124_kernel,1001.0,52.511,55.103,51.903,1001.0,2.41,3.488,2.3040000000000003,10001.0,2.422,5.92,2.208,10001.0,2.422,5.92,2.208,10001.0,2.186,5.888,2.143,10001.0,2.069,2.976,1.856,10001.0,4.125000,5.088000,3.904000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_3511795677805219761_kernel,1001.0,45.192,50.591,44.735,1001.0,45.19,49.151,44.703,10001.0,44.881,50.94300000000001,44.159,10001.0,44.881,50.94300000000001,44.159,10001.0,44.898,50.815,44.223,10001.0,44.731,48.095,44.223,10001.0,47.193,51.072,46.495000000000005
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_12101948072875812668_kernel,1001.0,1.223,2.9760000000000004,1.1840000000000002,1001.0,1.221,1.536,1.1840000000000002,10001.0,1.208,3.04,1.183,10001.0,1.208,3.04,1.183,10001.0,1.208,3.04,1.183,10001.0,1.208,1.664,1.183,10001.0,2.870000,3.520000,2.783000
output_gpu,bert_large,Fused_Greater_Cast_Sub_Minimum_Mul_Sub_Pow_Mul_Mul_Cast_Mul_Mul_Add_split_2827037989450780560_kernel,1001.0,1.608,3.744,1.568,1001.0,1.38,2.176,1.344,10001.0,1.606,3.808,1.568,10001.0,1.606,3.808,1.568,10001.0,1.607,9.312,1.568,10001.0,1.38,2.048,1.343,10001.0,3.233000,4.096000,3.167000
output_gpu,bert_large,Fused_Mul_Add_Rsqrt_split_7095141008203027021_kernel,1001.0,1.264,3.968,1.247,1001.0,1.266,3.968,1.247,10001.0,1.262,4.063,1.247,10001.0,1.262,4.063,1.247,10001.0,1.263,4.064,1.247,10001.0,1.262,1.696,1.247,10001.0,2.978000,6.912000,2.912000
output_gpu,bert_large,Fused_Mul_LessEqual_Cast_Mul_Add_ReduceSum_split_14087634351628807710_kernel,1001.0,107.155,114.622,106.143,1001.0,107.073,112.478,105.758,10001.0,106.035,112.061,105.118,10001.0,106.035,112.061,105.118,10001.0,106.048,112.765,105.118,10001.0,106.053,109.918,104.99,10001.0,107.768,111.583,106.815000
output_gpu,bert_large,Fused_Sub_Mul_ReduceSum_split_9576479946113568716_kernel,1001.0,49.583,55.935,48.863,1001.0,48.696000000000005,53.376000000000005,48.032,10001.0,46.345,52.767,45.375,10001.0,46.345,52.767,45.375,10001.0,46.351000000000006,53.023,45.407,10001.0,46.274,51.808,45.376,10001.0,48.518000,52.928000000000004,47.520000
output_gpu,bert_large,Fused_Reshape_ReduceSum_Add_RealDiv_split_6710312135617753113_kernel,1001.0,2.407,4.768,2.367,1001.0,2.407,3.2960000000000003,2.367,10001.0,2.138,4.448,2.111,10001.0,2.138,4.448,2.111,10001.0,2.138,4.48,2.111,10001.0,1.833,2.528,1.823,10001.0,4.125000,5.280000,4.063000
output_gpu,bert_large,Fused_Mul_ReduceSum_split_13893736644811418886_kernel,1001.0,2.255,4.704,2.239,1001.0,2.255,3.232,2.239,10001.0,2.119,5.28,2.079,10001.0,2.119,5.28,2.079,10001.0,2.119,5.312,2.079,10001.0,1.819,2.528,1.791,10001.0,3.850000,4.768000,3.775000
output_gpu,bert_large,Fused_Mul_split_8987365791484717414_kernel,1001.0,1.228,3.904,1.1840000000000002,1001.0,1.114,1.472,1.087,10001.0,1.225,4.0,1.1840000000000002,10001.0,1.225,4.0,1.1840000000000002,10001.0,1.225,3.999,1.184,10001.0,1.226,3.967,1.184,10001.0,3.200000,3.776000,3.135000
output_gpu,bert_large,Fused_ReduceSum_split_12090643136724717713_kernel,1001.0,227.552,242.173,219.965,1001.0,227.646,241.18,220.284,10001.0,90.629,102.782,89.919,10001.0,90.629,102.782,89.919,10001.0,90.56,102.27,89.79,10001.0,90.522,99.743,89.855,10001.0,93.241,101.983,92.095000
output_gpu,bert_large,Fused_ReduceSum_split_999075897968895260_kernel,1001.0,3.573,7.744,3.519,1001.0,3.573,7.2,3.519,10001.0,3.498,8.16,3.455,10001.0,3.498,8.16,3.455,10001.0,3.497,7.584,3.455,10001.0,2.997,4.511,2.944,10001.0,5.579000,7.071000,5.503000
output_gpu,bert_large,Fused_GeLU_10673146742075701310_kernel,1001.0,173.706,179.134,172.926,1001.0,173.61,179.454,172.957,10001.0,173.364,179.708,172.701,10001.0,173.364,179.708,172.701,10001.0,173.366,179.517,172.701,10001.0,173.46900000000002,873.3,172.60500000000002,10001.0,176.164,179.999,174.974
output_gpu,bert_large,Fused_ReduceSum_split_10404330036365636525_kernel,1001.0,15.029000000000002,17.92,15.007,1001.0,15.015,15.36,14.975,10001.0,2.79,5.632000000000001,2.72,10001.0,2.79,5.632000000000001,2.72,10001.0,2.892,6.08,2.848,10001.0,2.49,3.296,2.336,10001.0,4.717000,5.472000,4.640000
output_gpu,bert_large,Fused_ReduceSum_split_3588669061737312733_kernel,1001.0,22.201,32.736,21.631,1001.0,21.877,30.24,21.408,10001.0,21.749,33.407,21.216,10001.0,21.749,33.407,21.216,10001.0,21.699,33.343,21.216,10001.0,21.467,30.496,21.088,10001.0,23.731000,32.799,23.296000
output_gpu,bert_large,Fused_Mul_split_10211604690441937993_kernel,1001.0,1.225,3.904,1.1840000000000002,1001.0,1.12,6.752000000000001,1.087,10001.0,1.226,4.0,1.183,10001.0,1.226,4.0,1.183,10001.0,1.225,4.0,1.183,10001.0,1.225,1.664,1.184,10001.0,3.208000,4.064000,3.135000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.1840000000000002,1001.0,1.231,1.664,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.104,1.183,10001.0,1.207,1.536,1.183,10001.0,3.071000,3.584000,3.007000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_15873506106088978111_kernel,1001.0,45.192,50.56,44.735,1001.0,45.2,51.231,44.735,10001.0,44.93,50.783,44.255,10001.0,44.93,50.783,44.255,10001.0,44.988,50.815,44.287,10001.0,44.802,48.479,44.255,10001.0,47.332,51.071,46.687
output_gpu,bert_large,Fused_ReduceSum_split_13706748473241630349_kernel,1001.0,1.645,4.192,1.6,1001.0,1.643,1.856,1.6,10001.0,1.64,3.552,1.599,10001.0,1.64,3.552,1.599,10001.0,1.64,3.552,1.599,10001.0,1.41,1.76,1.375,10001.0,3.277000,3.808000,3.200000
output_gpu,bert_large,Fused_Sub_Mul_Mul_split_7488459301201101106_kernel,1001.0,124.279,129.662,123.646,1001.0,121.551,124.606,120.798,10001.0,121.466,127.006,120.414,10001.0,121.466,127.006,120.414,10001.0,121.463,126.782,120.445,10001.0,121.546,125.535,120.67,10001.0,123.166,126.943,122.399
output_gpu,bert_large,Fused_Reshape_Neg_Mul_split_5784705830830173820_kernel,1001.0,1.24,3.936,1.215,1001.0,1.242,4.287,1.215,10001.0,1.236,4.192,1.215,10001.0,1.236,4.192,1.215,10001.0,1.236,4.0,1.215,10001.0,1.069,1.472,1.024,10001.0,2.968000,3.488000,2.880000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_2235168741890136822_kernel,1001.0,1.556,3.552,1.535,1001.0,1.555,2.08,1.535,10001.0,1.5530000000000002,3.776,1.535,10001.0,1.5530000000000002,3.776,1.535,10001.0,1.553,3.584,1.535,10001.0,1.333,1.856,1.311,10001.0,3.146000,3.872000,3.103000
output_gpu,bert_large,Fused_Sub_Mul_ReduceSum_split_6305333425251864818_kernel,1001.0,6.113,10.176,6.079,1001.0,5.255,7.104,5.215,10001.0,4.759,8.671,4.638999999999999,10001.0,4.759,8.671,4.638999999999999,10001.0,4.754,8.768,4.639,10001.0,4.769,9.088,4.64,10001.0,6.413000,7.424000,6.303000
output_gpu,bert_large,Fused_Mul_Add_ReduceMax_split_15776480582319130176_kernel,1001.0,111.2,120.542,103.839,1001.0,113.001,121.342,111.87,10001.0,103.601,120.67,100.254,10001.0,103.601,120.67,100.254,10001.0,103.791,120.83,100.446,10001.0,102.33,108.287,100.479,10001.0,111.563,127.61500000000001,108.928000
output_gpu,bert_large,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_15211520463956997496_kernel,1001.0,1.379,4.032,1.344,1001.0,1.376,1.7280000000000002,1.3430000000000002,10001.0,1.376,4.384,1.3430000000000002,10001.0,1.376,4.384,1.3430000000000002,10001.0,1.376,4.128,1.343,10001.0,1.377,1.728,1.343,10001.0,3.347000,3.840000,3.295000
output_gpu,bert_large,Fused_AdamWeightDecay_17971378980737717084_kernel,1001.0,20.878,22.336,20.352,1001.0,21.239,22.399,20.064,10001.0,20.979,22.304,20.351,10001.0,20.979,22.304,20.351,10001.0,21.155,22.528,20.415,10001.0,20.821,715.735,19.327,10001.0,22.946000,24.384000,21.631000
output_gpu,bert_large,Fused_Mul_Add_Rsqrt_split_18088083862193468307_kernel,1001.0,1.274,3.936,1.247,1001.0,1.275,4.32,1.247,10001.0,1.272,4.032,1.247,10001.0,1.272,4.032,1.247,10001.0,1.271,4.032,1.247,10001.0,1.098,1.536,1.055,10001.0,3.005000,3.520000,2.944000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_9107615871551459299_kernel,1001.0,1.863,4.672,1.824,1001.0,1.864,2.176,1.824,10001.0,1.863,4.6080000000000005,1.823,10001.0,1.863,4.6080000000000005,1.823,10001.0,1.863,4.608,1.823,10001.0,1.865,2.304,1.823,10001.0,3.932000,4.896000,3.871000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_12704408583507156626_kernel,1001.0,1.3530000000000002,4.192,1.3119999999999998,1001.0,1.356,4.48,1.3430000000000002,10001.0,1.351,4.288,1.311,10001.0,1.351,4.288,1.311,10001.0,1.351,4.256,1.311,10001.0,1.351,4.48,1.311,10001.0,3.085000,3.776000,3.008000
output_gpu,bert_large,Fused_Mul_Mul_Add_Mul_LessEqual_Cast_Mul_split_18209420727665645056_kernel,1001.0,87.505,93.311,86.303,1001.0,88.26700000000001,95.007,87.42200000000001,10001.0,88.17399999999999,94.079,87.134,10001.0,88.17399999999999,94.079,87.134,10001.0,88.18700000000001,94.59,86.846,10001.0,85.091,789.493,83.839,10001.0,92.531,96.351,91.007
output_gpu,bert_large,Fused_Mul_ReduceSum_split_16423130798082889369_kernel,1001.0,2.522,4.992,2.495,1001.0,2.521,3.487,2.495,10001.0,2.155,5.312,2.143,10001.0,2.155,5.312,2.143,10001.0,2.155,5.344,2.143,10001.0,1.851,2.56,1.823,10001.0,3.830000,4.736000,3.775000
output_gpu,bert_large,Fused_Mul_Mul_Mul_Mul_Add_Mul_Mul_Mul_Add_Mul_Add_Mul_Mul_Mul_Add_Mul_Tanh_Add_M_more_split_7771633921857610865_kernel,1001.0,15.604,19.455,14.688,1001.0,17.441,18.816,16.256,10001.0,17.6,19.968,16.127,10001.0,17.6,19.968,16.127,10001.0,17.616,20.096,16.16,10001.0,17.611,19.263,16.192,10001.0,19.669000,20.928000,18.464000
output_gpu,bert_large,Fused_Sub_Exp_ReduceSum_split_18401414507023603866_kernel,1001.0,1452.884,1562.764,1421.934,1001.0,1430.263,2091.5170000000003,1420.201,10001.0,221.825,227.835,220.38,10001.0,221.825,227.835,220.38,10001.0,221.846,229.116,220.572,10001.0,221.802,225.949,220.669,10001.0,224.723,229.822,223.486000
output_gpu,bert_large,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.3730000000000002,3.3280000000000003,1.3430000000000002,1001.0,1.423,1.952,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.42,3.488,1.407,10001.0,1.256,1.984,1.215,10001.0,3.061000,3.744000,3.007000
output_gpu,bert_large,Fused_ReduceSum_split_15820032083568895392_kernel,1001.0,1.223,3.968,1.1840000000000002,1001.0,1.224,4.352,1.1840000000000002,10001.0,1.395,3.3280000000000003,1.375,10001.0,1.395,3.3280000000000003,1.375,10001.0,1.396,9.312,1.375,10001.0,1.396,1.824,1.375,10001.0,3.037000,3.743000,2.975000
output_gpu,bert_large,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_13095791597391198535_kernel,1001.0,1.379,4.064,1.3430000000000002,1001.0,1.3769999999999998,1.76,1.3430000000000002,10001.0,1.3769999999999998,4.192,1.3430000000000002,10001.0,1.3769999999999998,4.192,1.3430000000000002,10001.0,1.376,4.127,1.343,10001.0,1.376,1.728,1.343,10001.0,3.114000,3.744000,3.071000
output_gpu,bert_large,Fused_RealDiv_Mul_Add_split_11550133980623514999_kernel,1001.0,1.661,3.84,1.631,1001.0,1.509,2.176,1.472,10001.0,1.66,3.872,1.631,10001.0,1.66,3.872,1.631,10001.0,1.66,3.872,1.631,10001.0,1.66,2.368,1.631,10001.0,3.503000,4.384000,3.423000
output_gpu,bert_large,Fused_Transpose_split_16627034894983593162_kernel,1001.0,46.847,51.968,45.472,1001.0,44.488,50.07899999999999,43.775,10001.0,42.729000000000006,49.119,42.07899999999999,10001.0,42.729000000000006,49.119,42.07899999999999,10001.0,42.79,49.31200000000001,42.111,10001.0,98.039,918.867,94.238,10001.0,106.335,107.072,105.98200000000001
output_gpu,bert_large,Fused_ReduceSum_split_8843945041846172220_kernel,1001.0,22.209,32.895,21.728,1001.0,21.889,30.463,21.439,10001.0,21.782,33.119,21.279,10001.0,21.782,33.119,21.279,10001.0,21.776,33.056000000000004,21.247,10001.0,21.462,29.536,21.088,10001.0,23.728000,33.12,23.295000
output_gpu,bert_large,Fused_Mul_ReduceSum_split_11793891151941006127_kernel,1001.0,1.408,3.424,1.375,1001.0,1.4069999999999998,1.92,1.375,10001.0,1.406,3.488,1.375,10001.0,1.406,3.488,1.375,10001.0,1.409,9.632,1.375,10001.0,1.21,1.856,1.183,10001.0,3.053000,4.032000,3.007000
output_gpu,bert_large,Fused_ReduceSum_split_3232986928454513960_kernel,1001.0,333.63,375.068,300.572,1001.0,316.41700000000003,368.442,298.268,10001.0,98.523,109.182,97.278,10001.0,98.523,109.182,97.278,10001.0,95.896,106.27,94.878,10001.0,96.032,805.589,94.847,10001.0,98.408,107.199,97.119000
output_gpu,bert_large,Fused_RealDiv_Mul_LessEqual_Cast_Mul_split_18279394561205515519_kernel,1001.0,217.228,223.677,216.221,1001.0,211.389,215.676,210.172,10001.0,211.623,217.371,210.268,10001.0,211.623,217.371,210.268,10001.0,211.607,217.596,210.204,10001.0,211.743,912.339,210.334,10001.0,213.667,217.886,212.41299999999998
output_gpu,bert_large,Fused_ReduceSum_split_15671849393298523079_kernel,1001.0,3.3280000000000003,6.303999999999999,3.295,1001.0,3.323,4.032,3.295,10001.0,1.79,4.544,1.759,10001.0,1.79,4.544,1.759,10001.0,1.523,4.288,1.503,10001.0,1.524,4.607,1.503,10001.0,3.261000,3.743000,3.199000
output_gpu,bert_large,Fused_Reshape_ReduceSum_split_6943325950722558809_kernel,1001.0,88.802,93.695,83.551,1001.0,87.416,90.174,83.48700000000001,10001.0,21.268,31.327,20.512,10001.0,21.268,31.327,20.512,10001.0,21.348000000000003,31.039,20.896,10001.0,21.094,28.736,20.704,10001.0,23.287000,31.488000000000003,22.175000
output_gpu,bert_large,Fused_Mul_Mul_Add_Add_Mul_Mul_ReduceSum_split_14590059488980906875_kernel,1001.0,1166.647,1431.9820000000002,847.158,1001.0,1173.954,1972.575,804.819,10001.0,108.499,114.846,106.302,10001.0,108.499,114.846,106.302,10001.0,110.977,114.494,109.278,10001.0,110.963,786.518,108.927,10001.0,113.03399999999999,117.343,111.326000
output_gpu,bert_large,Fused_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_934787783141644352_kernel,1001.0,1.511,4.192,1.472,1001.0,1.3,2.144,1.279,10001.0,1.508,4.256,1.471,10001.0,1.508,4.256,1.471,10001.0,1.508,4.256,1.471,10001.0,1.371,1.696,1.343,10001.0,3.254000,3.775000,3.199000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_3785957052720432159_kernel,1001.0,1.053,3.648,1.023,1001.0,1.052,1.344,1.023,10001.0,1.0590000000000002,3.712,1.023,10001.0,1.0590000000000002,3.712,1.023,10001.0,1.054,3.712,1.023,10001.0,1.054,1.472,1.023,10001.0,2.995000,3.456000,2.912000
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_13306996589425526854_kernel,1001.0,12.988,18.112,12.48,1001.0,12.98,17.567999999999998,12.319,10001.0,12.943,17.919,12.512,10001.0,12.943,17.919,12.512,10001.0,12.847,17.951,12.095,10001.0,12.948,17.952,12.48,10001.0,15.547,18.144000000000002,14.879
output_gpu,bert_large,Fused_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_4581979027189965273_kernel,1001.0,1.5119999999999998,4.352,1.472,1001.0,1.298,1.632,1.279,10001.0,1.51,8.992,1.471,10001.0,1.51,8.992,1.471,10001.0,1.508,4.256,1.471,10001.0,1.373,2.08,1.343,10001.0,3.250000,3.744000,3.199000
output_gpu,bert_large,Fused_Add_Reshape_Reshape_Add_split_9958957112006768850_kernel,1001.0,63.534,69.407,62.655,1001.0,63.521,70.143,62.75,10001.0,63.414,69.726,62.527,10001.0,63.414,69.726,62.527,10001.0,63.443000000000005,70.68599999999998,62.558,10001.0,64.133,765.6859999999999,63.135,10001.0,66.077,70.752,65.055
output_gpu,bert_large,Fused_Reshape_Cast_Reshape_Sub_Mul_split_8135735155960499556_kernel,1001.0,1.247,3.936,1.215,1001.0,1.245,1.6,1.215,10001.0,1.242,4.0,1.215,10001.0,1.242,4.0,1.215,10001.0,1.242,4.032,1.215,10001.0,1.242,1.632,1.215,10001.0,3.204000,3.712000,3.135000
output_gpu,bert_large,Fused_Log_split_9306536037813036959_kernel,1001.0,1.295,3.968,1.279,1001.0,1.112,1.44,1.087,10001.0,1.293,9.087,1.279,10001.0,1.293,9.087,1.279,10001.0,1.292,4.064,1.279,10001.0,1.113,1.472,1.087,10001.0,3.227000,4.576000,3.167000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_1366229370543639355_kernel,1001.0,1.054,3.903,1.023,1001.0,1.092,1.856,1.023,10001.0,1.055,3.712,1.023,10001.0,1.055,3.712,1.023,10001.0,1.055,3.743,1.023,10001.0,1.055,3.296,1.023,10001.0,2.818000,3.264000,2.720000
output_gpu,bert_large,Fused_AdamWeightDecay_1576857767906520669_kernel,1001.0,39.166,44.896,38.655,1001.0,39.223,42.24,38.624,10001.0,39.343,44.288,38.559,10001.0,39.343,44.288,38.559,10001.0,39.188,44.607,38.463,10001.0,38.898,42.08,38.239,10001.0,41.722000,44.640000,40.544000
output_gpu,bert_large,Fused_Reshape_Transpose_fusion_950114872139856602_kernel,1001.0,47.341,53.055,46.144,1001.0,45.595,50.239,44.415,10001.0,42.86,49.215,42.239,10001.0,42.86,49.215,42.239,10001.0,42.883,49.279,42.207,10001.0,96.664,110.014,94.175,10001.0,106.309,107.61500000000001,105.98200000000001
output_gpu,bert_large,Fused_Mul_Greater_BroadcastTo_Select_Sqrt_Select_Maximum_Div_split_9071028844397667736_kernel,1001.0,4.807,7.808,4.735,1001.0,4.809,5.28,4.735,10001.0,4.8,7.84,4.735,10001.0,4.8,7.84,4.735,10001.0,4.802,7.967999999999999,4.735,10001.0,4.802,5.472,4.735,10001.0,6.555000,7.712000,6.463000
output_gpu,bert_large,Fused_BroadcastTo_inplace_assign_builder_12371805005481020605_kernel,1001.0,1.045,3.936,1.023,1001.0,1.045,1.3119999999999998,1.023,10001.0,1.0490000000000002,3.68,1.023,10001.0,1.0490000000000002,3.68,1.023,10001.0,1.054,3.68,1.023,10001.0,1.049,1.472,1.023,10001.0,2.822000,3.264000,2.751000
output_gpu,bert_large,Fused_AdamWeightDecay_8330669427124296228_kernel,1001.0,2.121,4.544,2.08,1001.0,2.121,3.264,2.08,10001.0,2.32,4.704,2.112,10001.0,2.32,4.704,2.112,10001.0,2.118,4.672,2.079,10001.0,2.261,3.008,2.079,10001.0,3.690000,4.896000,3.647000
output_gpu,bert_large,Fused_Mul_Mul_Add_split_7993648460547048852_kernel,1001.0,5.535,8.544,5.44,1001.0,5.023,5.92,4.896,10001.0,5.017,8.192,4.864,10001.0,5.017,8.192,4.864,10001.0,5.007,8.191,4.832,10001.0,3.995,4.5440000000000005,3.872,10001.0,6.596000,7.456000,6.432000
output_gpu,bert_large,Fused_Reshape_Mul_Mul_Add_Mul_Mul_Mul_Add_Sqrt_Add_RealDiv_Mul_Add_Mul_Sub_Assig_more_split_11870470574401437676_kernel,1001.0,1084.911,1091.122,1082.738,1001.0,1086.26,1760.515,1082.9260000000002,10001.0,1085.758,1092.843,1080.395,10001.0,1085.758,1092.843,1080.395,10001.0,1085.071,1090.764,1080.396,10001.0,1085.58,1813.255,1082.738,10001.0,1086.5910000000001,1089.494,1082.038
output_gpu,VGG16,Fused_Cast_BiasAdd_fusion_11015874976055949002_kernel,1001.0,3.372,6.207999999999999,3.2,1001.0,2.155,4.928,2.111,10001.0,2.147,4.928,2.08,10001.0,2.147,4.928,2.08,10001.0,2.148,4.96,2.111,10001.0,2.153,2.592,2.111,10001.0,4.124000,4.737000,4.063000
output_gpu,VGG16,Fused_Transpose_fusion_8786315546448402472_kernel,1001.0,2.182,4.992,2.143,1001.0,2.182,2.592,2.144,10001.0,2.175,5.024,2.143,10001.0,2.175,5.024,2.143,10001.0,2.175,5.12,2.143,10001.0,1.683,2.048,1.632,10001.0,3.604000,4.160000,3.551000
output_gpu,VGG16,Fused_Transpose_fusion_2651250179621429874_kernel,1001.0,2.338,5.28,2.303,1001.0,2.338,5.6,2.303,10001.0,2.331,5.184,2.272,10001.0,2.331,5.184,2.272,10001.0,2.332,5.216,2.303,10001.0,2.487,2.944,2.4,10001.0,4.219000,4.800000,4.127000
output_gpu,VGG16,Fused_Transpose_fusion_2109480782537172349_kernel,1001.0,1.27,4.0,1.247,1001.0,1.271,4.32,1.247,10001.0,1.266,4.096,1.247,10001.0,1.266,4.096,1.247,10001.0,1.267,4.32,1.247,10001.0,1.267,4.288,1.247,10001.0,3.012000,4.416000,2.944000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_6259131806528633217_kernel,1001.0,1.2819999999999998,4.16,1.247,1001.0,1.2819999999999998,1.664,1.247,10001.0,1.2819999999999998,6.72,1.247,10001.0,1.2819999999999998,6.72,1.247,10001.0,1.282,8.448,1.247,10001.0,1.16,1.536,1.119,10001.0,3.033000,3.552000,2.975000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_13531162913956986807_kernel,1001.0,2.222,5.247999999999999,2.176,1001.0,2.26,2.72,2.239,10001.0,2.26,5.376,2.207,10001.0,2.26,5.376,2.207,10001.0,2.26,5.247,2.207,10001.0,1.989,2.528,1.951,10001.0,4.026000,4.672000,3.967000
output_gpu,VGG16,Fused_Transpose_fusion_11612551847347472464_kernel,1001.0,23.974,27.072,23.68,1001.0,24.567,27.776,24.064,10001.0,23.933000000000003,27.903,21.983,10001.0,23.933000000000003,27.903,21.983,10001.0,23.644,27.583,21.055,10001.0,15.204,17.087999999999997,14.112,10001.0,17.842,19.040000000000003,17.408
output_gpu,VGG16,Fused_Cast_fusion_9503007563136944121_kernel,1001.0,785.7660000000001,813.814,781.206,1001.0,784.326,1491.146,780.693,10001.0,783.085,812.432,772.177,10001.0,783.085,812.432,772.177,10001.0,783.49,813.6489999999999,772.4019999999999,10001.0,882.547,1572.781,866.101,10001.0,799.063,805.9770000000001,789.817
output_gpu,VGG16,Fused_DropoutGrad_7348523398647405_kernel,1001.0,2.343,5.6,2.271,1001.0,2.012,2.624,1.951,10001.0,2.337,5.28,2.271,10001.0,2.337,5.28,2.271,10001.0,2.337,5.408,2.271,10001.0,3.011,7.424,2.88,10001.0,4.538000,5.248000,4.415000
output_gpu,VGG16,Fused_Dropout_14441653764211308227_kernel,1001.0,2.889,5.728,2.815,1001.0,2.882,3.392,2.815,10001.0,2.931,5.983,2.8160000000000003,10001.0,2.931,5.983,2.8160000000000003,10001.0,2.933,5.952,2.816,10001.0,5.594,8.799,5.376,10001.0,5.120000,5.920000,4.991000
output_gpu,VGG16,Fused_BroadcastTo_inplace_assign_builder_6600323157333801920_kernel,1001.0,1.0490000000000002,3.648,1.023,1001.0,1.051,3.968,1.023,10001.0,1.046,3.712,1.023,10001.0,1.046,3.712,1.023,10001.0,1.054,3.712,1.023,10001.0,1.048,1.44,1.023,10001.0,3.007000,3.552000,2.943000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_746009665116862803_kernel,1001.0,112.237,121.854,111.039,1001.0,21.274,25.024,20.672,10001.0,20.936,27.615,19.967,10001.0,20.936,27.615,19.967,10001.0,20.931,27.711,19.967,10001.0,19.749,26.559,18.752,10001.0,22.108000,26.943,21.024000
output_gpu,VGG16,Fused_Transpose_fusion_665433402975525103_kernel,1001.0,1.713,4.448,1.664,1001.0,1.707,2.208,1.664,10001.0,1.705,4.64,1.663,10001.0,1.705,4.64,1.663,10001.0,1.706,8.607,1.663,10001.0,1.663,1.984,1.6,10001.0,3.402000,4.032000,3.327000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_10572641205232097944_kernel,1001.0,5.586,8.607999999999999,5.472,1001.0,5.622999999999999,6.176,5.535,10001.0,5.626,8.799,5.535,10001.0,5.626,8.799,5.535,10001.0,5.626,8.767999999999999,5.535,10001.0,3.394,4.288,3.328,10001.0,5.577000,8.800000,5.472000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_7346313530950028297_kernel,1001.0,1.651,4.544,1.631,1001.0,1.712,2.08,1.695,10001.0,1.713,8.704,1.663,10001.0,1.713,8.704,1.663,10001.0,1.712,4.544,1.664,10001.0,1.728,4.448,1.664,10001.0,3.505000,4.064000,3.423000
output_gpu,VGG16,Fused_Mul_split_1208118231251741080_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.371,3.392,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.38,2.016,1.343,10001.0,3.082000,5.983000,3.008000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_10434588479252844865_kernel,1001.0,3.105,6.176,2.9760000000000004,1001.0,3.109,3.552,3.071,10001.0,3.11,6.016,3.071,10001.0,3.11,6.016,3.071,10001.0,3.11,6.016,3.071,10001.0,2.544,3.04,2.495,10001.0,4.603000,5.472000,4.543000
output_gpu,VGG16,Fused_Cast_fusion_17328680125618217250_kernel,1001.0,33.871,39.327,33.471,1001.0,33.858000000000004,37.215,33.343,10001.0,33.257000000000005,39.487,32.031,10001.0,33.257000000000005,39.487,32.031,10001.0,33.412,39.455,32.319,10001.0,36.596,40.543,34.655,10001.0,36.077999999999996,39.264,35.071
output_gpu,VGG16,Fused_Transpose_fusion_14665789518885984689_kernel,1001.0,8.368,11.424,8.256,1001.0,8.347999999999999,9.151,8.224,10001.0,8.357999999999999,11.616,8.224,10001.0,8.357999999999999,11.616,8.224,10001.0,8.306,11.424,7.103,10001.0,9.811,13.344,8.895999999999999,10001.0,12.488000,13.183000,12.192000
output_gpu,VGG16,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.183,1001.0,1.225,3.168,1.1840000000000002,10001.0,1.222,3.008,1.183,10001.0,1.222,3.008,1.183,10001.0,1.222,3.072,1.183,10001.0,1.207,1.536,1.183,10001.0,2.882000,3.360000,2.815000
output_gpu,VGG16,Fused_Cast_fusion_7783046391655246810_kernel,1001.0,132.05200000000002,138.974,129.63,1001.0,133.054,135.774,132.286,10001.0,129.993,138.685,128.31699999999998,10001.0,129.993,138.685,128.31699999999998,10001.0,129.77700000000002,138.622,128.222,10001.0,145.206,835.031,142.30300000000005,10001.0,133.878,137.631,132.83
output_gpu,VGG16,Fused_Sub_Exp_ReduceSum_split_10565266600396714336_kernel,1001.0,7.484,10.175,7.455,1001.0,7.487999999999999,10.24,7.455,10001.0,2.9130000000000003,6.464,2.879,10001.0,2.9130000000000003,6.464,2.879,10001.0,2.911,6.304,2.879,10001.0,2.647,3.584,2.623,10001.0,4.573000,5.504000,4.511000
output_gpu,VGG16,Fused_BroadcastTo_inplace_assign_builder_4261862423109646487_kernel,1001.0,1.061,3.68,1.024,1001.0,1.065,4.0,1.024,10001.0,1.065,3.872,1.023,10001.0,1.065,3.872,1.023,10001.0,1.068,3.744,1.023,10001.0,1.063,1.6,1.024,10001.0,2.833000,3.520000,2.751000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_16762074751097610432_kernel,1001.0,11.008,21.472,10.72,1001.0,14.128,16.288,13.664,10001.0,14.124,18.399,13.024,10001.0,14.124,18.399,13.024,10001.0,14.14,18.464,13.632,10001.0,10.602,15.328,9.952,10001.0,12.560000,15.072000,12.095000
output_gpu,VGG16,Fused_Transpose_fusion_4356161808770225376_kernel,1001.0,3.2030000000000003,5.984,3.167,1001.0,3.2,3.616,3.167,10001.0,3.198,6.047999999999999,3.136,10001.0,3.198,6.047999999999999,3.136,10001.0,3.2,6.08,3.136,10001.0,3.524,3.904,3.424,10001.0,5.278000,5.792000,5.151000
output_gpu,VGG16,Fused_Transpose_fusion_742818952266800260_kernel,1001.0,22.362,25.663,21.792,1001.0,21.248,27.104,20.864,10001.0,20.793000000000003,27.04,19.584,10001.0,20.793000000000003,27.04,19.584,10001.0,20.093,26.719,18.91200000000001,10001.0,20.828,22.912,18.784,10001.0,22.676000000000002,24.8,21.6
output_gpu,VGG16,Fused_ReduceSum_split_652067717987047060_kernel,1001.0,3.883,7.296,3.839,1001.0,3.889,7.263999999999999,3.84,10001.0,2.57,5.44,2.527,10001.0,2.57,5.44,2.527,10001.0,1.987,4.8,1.951,10001.0,1.987,2.4,1.951,10001.0,3.782000,4.992000,3.711000
output_gpu,VGG16,Fused_Reshape_Transpose_fusion_14746596682837487548_kernel,1001.0,27.957,32.063,27.616,1001.0,20.549,22.175,20.224,10001.0,20.406,24.576,18.495,10001.0,20.406,24.576,18.495,10001.0,20.567,25.087000000000003,20.16,10001.0,12.713,15.136,11.584,10001.0,14.619000,16.736000,13.888000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_8673186849306634597_kernel,1001.0,86.11200000000001,92.351,83.391,1001.0,79.145,82.238,78.495,10001.0,77.438,84.383,75.96700000000001,10001.0,77.438,84.383,75.96700000000001,10001.0,77.586,84.79799999999999,75.935,10001.0,96.419,103.935,93.343,10001.0,101.339,104.542,100.15899999999999
output_gpu,VGG16,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.423,3.4560000000000004,1.4069999999999998,10001.0,1.421,3.648,1.4069999999999998,10001.0,1.421,3.648,1.4069999999999998,10001.0,1.421,3.488,1.407,10001.0,1.432,3.392,1.407,10001.0,3.057000,3.775000,3.007000
output_gpu,VGG16,Fused_BiasAddGrad_Cast_fusion_6320203516961772148_kernel,1001.0,8.982000000000001,14.175999999999998,8.959,1001.0,8.988,13.92,8.927999999999999,10001.0,2.207,5.12,2.143,10001.0,2.207,5.12,2.143,10001.0,2.077,5.152,2.047,10001.0,2.083,5.087,2.047,10001.0,3.807000,4.544000,3.743000
output_gpu,VGG16,Fused_Transpose_fusion_12818412381474925800_kernel,1001.0,4.871,7.776,4.799,1001.0,4.879,5.632000000000001,4.8,10001.0,4.863,7.742999999999999,4.767,10001.0,4.863,7.742999999999999,4.767,10001.0,4.865,7.839,4.768,10001.0,4.914,5.408,4.704,10001.0,7.263000,7.872000,7.072000
output_gpu,VGG16,Fused_Cast_Transpose_fusion_3528709108505838205_kernel,1001.0,2.041,4.992,2.015,1001.0,2.0980000000000003,2.752,2.048,10001.0,2.096,5.152,2.047,10001.0,2.096,5.152,2.047,10001.0,2.096,5.088,2.047,10001.0,1.779,4.576,1.728,10001.0,3.710000,4.576000,3.647000
output_gpu,VGG16,Fused_Cast_split_18407651685387663201_kernel,1001.0,1.233,3.968,1.215,1001.0,1.067,1.568,1.024,10001.0,1.232,4.064,1.215,10001.0,1.232,4.064,1.215,10001.0,1.231,4.192,1.215,10001.0,1.061,1.44,1.024,10001.0,3.171000,3.808000,3.103000
output_gpu,bert_ner,Fused_Reciprocal_split_9715664751193780275_kernel,1001.0,1.418,3.424,1.375,1001.0,1.4169999999999998,1.984,1.376,10001.0,1.42,8.607000000000001,1.376,10001.0,1.42,8.607000000000001,1.376,10001.0,1.417,3.616,1.375,10001.0,1.22,3.968,1.183,10001.0,3.054000,3.744000,3.007000
output_gpu,bert_ner,Fused_Add_Reshape_Reshape_Add_split_14093257215561905173_kernel,1001.0,49.309,53.535,48.192,1001.0,47.425,53.983,46.911,10001.0,47.406000000000006,54.239,46.783,10001.0,47.406000000000006,54.239,46.783,10001.0,47.408,53.75899999999999,46.751000000000005,10001.0,47.690000000000005,54.527,46.975,10001.0,49.534,54.688,48.766999999999996
output_gpu,bert_ner,Fused_Reshape_LessEqual_Sub_LessEqual_LogicalOr_Select_Mul_Maximum_Select_fusion_1537099880519983217_kernel,1001.0,1.808,4.096,1.791,1001.0,1.7519999999999998,2.56,1.727,10001.0,1.806,4.32,1.791,10001.0,1.806,4.32,1.791,10001.0,1.805,4.128,1.791,10001.0,1.806,2.624,1.791,10001.0,3.634000,4.736000,3.583000
output_gpu,bert_ner,Fused_Mul_Mul_ReduceSum_Mul_split_7563720092882377888_kernel,1001.0,6.409,8.736,6.239,1001.0,6.424,7.296,6.272,10001.0,2.676,6.24,2.655,10001.0,2.676,6.24,2.655,10001.0,2.675,6.08,2.655,10001.0,2.675,3.52,2.655,10001.0,4.350000,5.344000,4.287000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_3755334131975342823_kernel,1001.0,1.051,3.968,1.023,1001.0,1.24,1.567,1.215,10001.0,1.239,3.072,1.215,10001.0,1.239,3.072,1.215,10001.0,1.239,3.104,1.215,10001.0,1.239,1.536,1.215,10001.0,2.888000,3.393000,2.816000
output_gpu,bert_ner,Fused_Mul_Mul_ReduceSum_Mul_split_4931910940408955701_kernel,1001.0,6.443,9.12,6.431,1001.0,6.444,9.12,6.431,10001.0,2.495,6.112,2.463,10001.0,2.495,6.112,2.463,10001.0,2.494,5.824,2.463,10001.0,2.495,3.424,2.463,10001.0,4.137000,5.344000,4.063000
output_gpu,bert_ner,Fused_ReduceSum_Mul_split_847463926504720470_kernel,1001.0,18.014,30.528,17.727,1001.0,18.016,29.024,17.727999999999998,10001.0,17.976,28.959,17.184,10001.0,17.976,28.959,17.184,10001.0,18.195,28.128,17.216,10001.0,17.339,903.282,16.927,10001.0,19.939000,29.184,19.424000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_779582814477784037_kernel,1001.0,1.655,4.448,1.631,1001.0,1.654,4.352,1.631,10001.0,1.651,4.704,1.631,10001.0,1.651,4.704,1.631,10001.0,1.652,4.352,1.631,10001.0,1.654,1.984,1.631,10001.0,3.685000,4.225000,3.615000
output_gpu,bert_ner,Fused_Sub_Mul_Mul_split_14426505140340145857_kernel,1001.0,93.999,98.879,93.278,1001.0,91.342,95.166,90.75,10001.0,91.453,97.663,90.718,10001.0,91.453,97.663,90.718,10001.0,91.456,97.47,90.686,10001.0,91.579,773.9399999999999,90.655,10001.0,93.894,97.91900000000001,92.89500000000001
output_gpu,bert_ner,Fused_Mul_Mul_Add_Add_Mul_Mul_ReduceSum_split_7548525193193236377_kernel,1001.0,1325.981,1578.7320000000002,955.38,1001.0,1302.183,2108.344,738.194,10001.0,88.039,95.774,85.759,10001.0,88.039,95.774,85.759,10001.0,88.42899999999999,93.758,86.623,10001.0,88.452,978.129,86.75,10001.0,90.863,94.591,89.183000
output_gpu,bert_ner,Fused_Sub_Exp_ReduceSum_split_10845074449741576939_kernel,1001.0,86.59899999999999,95.935,85.40700000000001,1001.0,86.58,95.23,85.406,10001.0,81.69200000000002,95.71,77.727,10001.0,81.69200000000002,95.71,77.727,10001.0,80.933,95.295,77.726,10001.0,79.68,804.883,77.823,10001.0,85.466,94.208,83.775000
output_gpu,bert_ner,Fused_Cast_Greater_Cast_Sub_Minimum_Mul_Sub_Mul_Add_Mul_Minimum_Cast_Mul_Mul_Add_split_3238740620220587521_kernel,1001.0,1.405,3.392,1.375,1001.0,1.402,1.952,1.375,10001.0,1.403,3.488,1.375,10001.0,1.403,3.488,1.375,10001.0,1.402,3.488,1.375,10001.0,1.402,1.952,1.375,10001.0,3.272000,4.000000,3.199000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_12101948072875812668_kernel,1001.0,1.223,2.944,1.183,1001.0,1.222,1.504,1.1840000000000002,10001.0,1.208,3.04,1.183,10001.0,1.208,3.04,1.183,10001.0,1.21,3.072,1.183,10001.0,1.208,2.976,1.183,10001.0,2.873000,3.456000,2.783000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_3729463149435263926_kernel,1001.0,1.052,3.648,1.023,1001.0,1.057,4.064,1.023,10001.0,1.056,4.0,1.023,10001.0,1.056,4.0,1.023,10001.0,1.052,3.744,1.023,10001.0,1.14,2.656,1.023,10001.0,2.804000,3.264000,2.720000
output_gpu,bert_ner,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_17865465815588720940_kernel,1001.0,18.541,31.104,18.048,1001.0,18.495,27.295,17.984,10001.0,24.277,32.768,21.696,10001.0,24.277,32.768,21.696,10001.0,23.898,31.872,21.791,10001.0,22.065,28.064,21.023,10001.0,25.255000,31.423,24.064000
output_gpu,bert_ner,Fused_Add_fusion_11383007565367662187_kernel,1001.0,1.37,3.36,1.3430000000000002,1001.0,1.369,1.92,1.3430000000000002,10001.0,1.371,8.863999999999999,1.3430000000000002,10001.0,1.371,8.863999999999999,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.37,2.528,1.343,10001.0,3.019000,3.712000,2.943000
output_gpu,bert_ner,Fused_Mul_Mul_ReduceSum_Mul_split_7030311586437121275_kernel,1001.0,6.579,9.024,6.559,1001.0,6.579,9.183,6.559,10001.0,2.464,4.96,2.431,10001.0,2.464,4.96,2.431,10001.0,2.463,4.799,2.431,10001.0,2.463,3.327,2.431,10001.0,4.117000,5.152000,4.031000
output_gpu,bert_ner,Fused_LogicalNot_LogicalAnd_Mul_Select_Assign_fusion_7177650377850726476_kernel,1001.0,1.588,3.808,1.567,1001.0,1.472,2.24,1.439,10001.0,1.6269999999999998,3.808,1.599,10001.0,1.6269999999999998,3.808,1.599,10001.0,1.627,3.808,1.599,10001.0,1.407,2.144,1.375,10001.0,3.231000,4.064000,3.167000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_15466795178772919507_kernel,1001.0,1.141,3.904,1.119,1001.0,1.141,1.408,1.119,10001.0,1.141,4.256,1.119,10001.0,1.141,4.256,1.119,10001.0,1.14,3.904,1.119,10001.0,1.168,9.376,1.119,10001.0,2.909000,3.360000,2.847000
output_gpu,bert_ner,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,3.2,1.183,1001.0,1.226,3.072,1.183,10001.0,1.222,3.2,1.183,10001.0,1.222,3.2,1.183,10001.0,1.222,3.04,1.183,10001.0,1.207,1.536,1.183,10001.0,2.872000,3.520000,2.815000
output_gpu,bert_ner,Fused_Mul_Mul_Add_split_11808595724686856598_kernel,1001.0,36.682,41.471,36.0,1001.0,34.434,40.096,33.855,10001.0,34.181000000000004,40.223,33.375,10001.0,34.181000000000004,40.223,33.375,10001.0,34.284,39.839,33.503,10001.0,32.45,36.767,31.648000000000003,10001.0,36.665000000000006,39.679,35.486999999999995
output_gpu,bert_ner,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_15211520463956997496_kernel,1001.0,1.38,4.063,1.3430000000000002,1001.0,1.378,1.92,1.3430000000000002,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.376,4.128,1.343,10001.0,1.377,1.728,1.343,10001.0,3.359000,3.872000,3.295000
output_gpu,bert_ner,Fused_Transpose_split_6974001358186712233_kernel,1001.0,36.62,41.248000000000005,35.743,1001.0,37.163,39.263000000000005,36.415,10001.0,33.753,40.287000000000006,32.927,10001.0,33.753,40.287000000000006,32.927,10001.0,33.834999999999994,39.903,33.023,10001.0,71.31800000000001,71.967,71.07,10001.0,80.821,81.855,80.447
output_gpu,bert_ner,Fused_ReduceSum_split_16448887507098774487_kernel,1001.0,66.22800000000001,71.616,64.831,1001.0,66.061,71.422,64.767,10001.0,16.59,24.576,16.128,10001.0,16.59,24.576,16.128,10001.0,16.517,24.927,16.031,10001.0,16.282999999999998,22.432,15.808,10001.0,18.520000,24.639999999999997,17.984000
output_gpu,bert_ner,Fused_GeLU_12768812415171470673_kernel,1001.0,130.795,136.894,130.047,1001.0,130.84799999999998,133.95,130.269,10001.0,130.429,136.542,129.75799999999998,10001.0,130.429,136.542,129.75799999999998,10001.0,130.392,136.349,129.726,10001.0,130.656,821.7149999999999,129.758,10001.0,133.00699999999998,137.27900000000002,132.031
output_gpu,bert_ner,Fused_Reshape_ReduceSum_split_601052953231422684_kernel,1001.0,66.211,71.487,64.767,1001.0,64.001,70.943,62.719,10001.0,16.534000000000002,24.671,16.032,10001.0,16.534000000000002,24.671,16.032,10001.0,16.517,25.184,15.968,10001.0,16.521,22.464,15.936,10001.0,18.693000,24.447,17.664000
output_gpu,bert_ner,Fused_Reshape_Transpose_fusion_9854266247006671863_kernel,1001.0,35.49,40.608,34.816,1001.0,36.576,41.119,35.551,10001.0,32.97,39.39100000000001,32.287000000000006,10001.0,32.97,39.39100000000001,32.287000000000006,10001.0,32.992,39.584,32.256,10001.0,71.35900000000001,714.677,71.03800000000001,10001.0,80.716,81.50399999999999,80.383
output_gpu,bert_ner,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.422,1.984,1.4069999999999998,10001.0,1.421,3.4560000000000004,1.4069999999999998,10001.0,1.421,3.4560000000000004,1.4069999999999998,10001.0,1.421,3.488,1.407,10001.0,1.433,1.952,1.407,10001.0,3.053000,3.904000,3.007000
output_gpu,bert_ner,Fused_Add_Log_Mul_Exp_Mul_Mul_Mul_split_13095791597391198535_kernel,1001.0,1.379,4.032,1.3430000000000002,1001.0,1.379,4.096,1.344,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.3769999999999998,4.384,1.3430000000000002,10001.0,1.376,4.096,1.343,10001.0,1.376,4.352,1.343,10001.0,3.115000,3.616000,3.071000
output_gpu,bert_ner,Fused_Mul_Mul_Mul_Add_Mul_Tanh_Add_Mul_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Add_Mul_R_more_split_6057875861280764015_kernel,1001.0,2531.38,3149.6240000000003,1513.773,1001.0,2502.049,3307.297,1792.254,10001.0,211.255,1118.5069999999996,205.692,10001.0,211.255,1118.5069999999996,205.692,10001.0,221.862,232.412,216.476,10001.0,222.215,903.25,216.989,10001.0,225.95399999999998,234.398,220.734000
output_gpu,bert_ner,Fused_Mul_Mul_ReduceSum_Mul_split_10184234684540523326_kernel,1001.0,6.608,9.088,6.591,1001.0,5.67,6.784,5.631,10001.0,2.494,5.055,2.463,10001.0,2.494,5.055,2.463,10001.0,2.492,4.832,2.463,10001.0,2.493,3.392,2.463,10001.0,4.115000,5.184000,3.904000
output_gpu,bert_ner,Fused_Reshape_Cast_Reshape_Sub_Mul_split_8135735155960499556_kernel,1001.0,1.248,3.936,1.215,1001.0,1.21,1.568,1.183,10001.0,1.242,4.256,1.215,10001.0,1.242,4.256,1.215,10001.0,1.243,4.032,1.215,10001.0,1.264,2.272,1.215,10001.0,3.005000,3.488000,2.943000
output_gpu,bert_ner,Fused_Mul_Mul_ReduceSum_Mul_split_6109414240503509605_kernel,1001.0,3.865,6.047999999999999,3.839,1001.0,3.864,4.544,3.839,10001.0,3.863,6.08,3.839,10001.0,3.863,6.08,3.839,10001.0,3.864,8.8,3.839,10001.0,3.862,4.672,3.839,10001.0,5.379000,6.208000,5.311000
output_gpu,bert_ner,Fused_RealDiv_Mul_LessEqual_Cast_Mul_split_1307138256337638741_kernel,1001.0,164.581,170.75,163.454,1001.0,159.364,164.669,158.30100000000002,10001.0,159.86700000000005,166.077,158.493,10001.0,159.86700000000005,166.077,158.493,10001.0,159.86700000000005,166.653,158.55700000000004,10001.0,160.122,917.745,158.39700000000002,10001.0,161.522,164.99,159.934
output_gpu,bert_ner,Fused_ReduceSum_Mul_Mul_Mul_Mul_Mul_Add_Mul_split_7566706435819628490_kernel,1001.0,18.533,29.728,17.984,1001.0,18.523,27.231,17.984,10001.0,24.306,32.352,22.015,10001.0,24.306,32.352,22.015,10001.0,23.745,33.439,21.792,10001.0,22.026,28.287,20.927,10001.0,25.165000,32.768,24.000000
output_gpu,bert_ner,Fused_Mul_Mul_Mul_ReduceSum_split_592974943113998615_kernel,1001.0,126.972,135.806,125.438,1001.0,127.423,132.766,126.078,10001.0,127.351,132.829,125.278,10001.0,127.351,132.829,125.278,10001.0,127.367,136.125,125.662,10001.0,127.576,851.091,125.47,10001.0,128.923,137.72699999999998,127.423000
output_gpu,bert_ner,Fused_Mul_Mul_Add_Mul_LessEqual_Cast_Mul_split_9452475813991627793_kernel,1001.0,68.158,73.663,67.487,1001.0,65.634,69.439,64.863,10001.0,65.74000000000001,72.831,64.926,10001.0,65.74000000000001,72.831,64.926,10001.0,65.72699999999999,72.191,64.831,10001.0,63.74100000000001,67.775,62.751000000000005,10001.0,68.448,72.959,67.36
output_gpu,bert_ner,Fused_Mul_split_9671876206845825795_kernel,1001.0,1.37,3.36,1.3430000000000002,1001.0,1.369,1.984,1.3430000000000002,10001.0,1.369,3.488,1.3430000000000002,10001.0,1.369,3.488,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.368,1.888,1.343,10001.0,3.234000,4.000000,3.167000
output_gpu,bert_ner,Fused_Mul_Add_ReduceMax_split_14375517492105268498_kernel,1001.0,86.25299999999999,94.974,85.15100000000001,1001.0,80.351,87.039,78.655,10001.0,80.60199999999998,712.2429999999998,76.863,10001.0,80.60199999999998,712.2429999999998,76.863,10001.0,79.794,94.91,76.735,10001.0,78.644,696.4050000000001,76.703,10001.0,84.88900000000001,91.967,83.327000
output_gpu,bert_ner,Fused_Reshape_Transpose_fusion_13614983290056184942_kernel,1001.0,35.463,40.575,34.88,1001.0,36.575,39.487,35.231,10001.0,32.991,39.519,32.32,10001.0,32.991,39.519,32.32,10001.0,32.976,39.199,32.319,10001.0,72.26299999999999,849.6510000000001,71.03800000000001,10001.0,80.778,82.4,80.383
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_11320170747834243405_kernel,1001.0,1.058,3.712,1.023,1001.0,1.11,2.784,1.023,10001.0,1.061,3.744,1.023,10001.0,1.061,3.744,1.023,10001.0,1.063,3.744,1.023,10001.0,0.917,1.344,0.895,10001.0,2.840000,3.264000,2.751000
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_8591226450643150054_kernel,1001.0,5.961,10.784,5.8870000000000005,1001.0,5.968999999999999,11.550999999999998,5.8870000000000005,10001.0,5.952000000000001,11.008,5.856,10001.0,5.952000000000001,11.008,5.856,10001.0,5.9510000000000005,10.976,5.856,10001.0,5.95,8.736,5.856,10001.0,7.745,10.687999999999999,7.552
output_gpu,deepfm,Fused_Cast_Mul_split_7158963791301647549_kernel,1001.0,1.406,4.224,1.375,1001.0,1.431,4.448,1.375,10001.0,1.402,4.384,1.375,10001.0,1.402,4.384,1.375,10001.0,1.403,4.224,1.375,10001.0,1.207,1.568,1.183,10001.0,3.184000,3.872000,3.135000
output_gpu,deepfm,Fused_Reshape_Mul_ReduceSum_split_4829910890489653637_kernel,1001.0,28.196,32.96,28.031,1001.0,28.162,33.024,27.999,10001.0,8.575,11.84,8.479,10001.0,8.575,11.84,8.479,10001.0,8.128,11.68,7.872,10001.0,8.131,8.96,7.808,10001.0,10.787000,11.712000,10.176000
output_gpu,deepfm,Fused_ReduceSum_split_10403650091473542781_kernel,1001.0,3.749,5.952000000000001,3.68,1001.0,3.749,4.512,3.68,10001.0,2.297,6.047000000000001,2.271,10001.0,2.297,6.047000000000001,2.271,10001.0,2.297,5.888,2.271,10001.0,2.296,3.456,2.271,10001.0,4.350000,5.856000,4.287000
output_gpu,deepfm,Fused_Cast_Add_Greater_Cast_Mul_split_7909609563230723991_kernel,1001.0,31.38,35.008,30.879,1001.0,27.165,32.0,26.431,10001.0,26.806,31.071,25.184,10001.0,26.806,31.071,25.184,10001.0,26.450000000000006,31.424,25.088,10001.0,25.695,28.095,24.8,10001.0,28.79,31.807000000000002,27.935000000000002
output_gpu,deepfm,Fused_Cast_Add_Greater_Cast_Mul_split_17465771081355149117_kernel,1001.0,60.36,64.319,59.775,1001.0,52.182,53.75899999999999,51.455,10001.0,51.012,56.319,48.639,10001.0,51.012,56.319,48.639,10001.0,50.70800000000001,56.511,48.511,10001.0,50.613,54.528,48.575,10001.0,53.669000000000004,56.095,52.574999999999996
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_8261444986825493822_kernel,1001.0,1.262,3.744,1.247,1001.0,1.268,5.952000000000001,1.247,10001.0,1.26,3.84,1.247,10001.0,1.26,3.84,1.247,10001.0,1.261,3.808,1.247,10001.0,1.086,9.184,1.055,10001.0,2.910000,3.360000,2.847000
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_3960045433526664861_kernel,1001.0,1.053,3.872,1.023,1001.0,1.115,2.24,1.023,10001.0,1.052,3.68,1.023,10001.0,1.052,3.68,1.023,10001.0,1.054,3.68,1.023,10001.0,0.918,1.408,0.864,10001.0,2.817000,3.360000,2.751000
output_gpu,deepfm,Fused_Mul_ReduceSum_split_5786251060074629964_kernel,1001.0,28.191,32.608000000000004,28.031,1001.0,28.195,33.28,27.999,10001.0,8.578,12.576,8.479,10001.0,8.578,12.576,8.479,10001.0,7.816,11.424,6.687,10001.0,7.654,11.232,7.52,10001.0,10.032000,11.360000,9.887000
output_gpu,deepfm,Fused_Cast_Add_split_5058870544143620295_kernel,1001.0,31.144,34.943000000000005,30.688,1001.0,26.827,32.032,26.08,10001.0,26.538,31.135,25.024,10001.0,26.538,31.135,25.024,10001.0,26.655,31.36,25.024,10001.0,25.721,652.054,24.512,10001.0,28.483,31.2,27.648
output_gpu,deepfm,Fused_BroadcastTo_Mul_split_4422170676585195038_kernel,1001.0,5.221,8.095,5.055,1001.0,5.237,8.544,5.056,10001.0,5.214,8.224,5.024,10001.0,5.214,8.224,5.024,10001.0,5.213,8.256,5.023,10001.0,4.473,5.216,4.319,10001.0,7.483000,8.192000,7.296000
output_gpu,deepfm,Fused_Cast_Mul_split_15556904825511396780_kernel,1001.0,27.412,32.736,26.944000000000003,1001.0,27.396,30.528,26.848000000000003,10001.0,27.338,32.832,26.624,10001.0,27.338,32.832,26.624,10001.0,27.204,32.832,26.559,10001.0,25.753,741.045,24.736,10001.0,29.991,33.984,28.575
output_gpu,deepfm,Fused_BroadcastTo_Mul_Mul_Add_Mul_split_10538182896147089217_kernel,1001.0,2.532,5.8870000000000005,2.464,1001.0,2.551,9.504,2.464,10001.0,2.53,9.152,2.463,10001.0,2.53,9.152,2.463,10001.0,2.528,5.856,2.463,10001.0,2.537,5.824,2.464,10001.0,4.288000,5.472000,4.191000
output_gpu,deepfm,Fused_Cast_split_10247996783945338004_kernel,1001.0,2.119,4.992,2.08,1001.0,2.122,5.056,2.079,10001.0,2.113,5.056,2.079,10001.0,2.113,5.056,2.079,10001.0,2.113,5.024,2.079,10001.0,2.116,2.784,2.079,10001.0,3.878000,4.576000,3.807000
output_gpu,deepfm,Fused_Cast_ReduceSum_split_15424511383944843442_kernel,1001.0,310.284,344.092,274.941,1001.0,310.17400000000004,344.12300000000005,274.204,10001.0,39.508,50.239,38.751,10001.0,39.508,50.239,38.751,10001.0,39.319,50.559,38.719,10001.0,39.213,895.3480000000001,38.655,10001.0,41.366,50.463,40.799000
output_gpu,deepfm,Fused_Cast_split_8201333723459195024_kernel,1001.0,3.651,6.5920000000000005,3.584,1001.0,3.636,4.096,3.583,10001.0,3.635,6.527,3.583,10001.0,3.635,6.527,3.583,10001.0,3.635,6.6880000000000015,3.583,10001.0,5.448,5.856,5.12,10001.0,5.854000,6.528000,5.759000
output_gpu,deepfm,Fused_ReduceSum_split_2397920035714093970_kernel,1001.0,3.748,5.984,3.68,1001.0,3.748,4.48,3.679,10001.0,2.155,5.28,2.112,10001.0,2.155,5.28,2.112,10001.0,2.155,5.28,2.112,10001.0,2.155,2.88,2.112,10001.0,4.157000,5.088000,4.095000
output_gpu,deepfm,Fused_Mul_split_12850699330263623486_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.38,9.504,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.423,1.343,10001.0,1.369,1.888,1.343,10001.0,3.037000,3.840000,2.975000
output_gpu,deepfm,Fused_Cast_Add_Greater_Cast_Mul_split_7266988415084394553_kernel,1001.0,16.267,20.736,15.839,1001.0,14.411,19.359,13.984000000000002,10001.0,14.448,19.199,13.984000000000002,10001.0,14.448,19.199,13.984000000000002,10001.0,14.431,19.423,13.952,10001.0,13.582,16.576,13.12,10001.0,16.232000,18.816000,15.616000
output_gpu,deepfm,Fused_Mul_split_15128307268254343677_kernel,1001.0,1.232,3.904,1.215,1001.0,1.232,3.968,1.215,10001.0,1.23,4.128,1.215,10001.0,1.23,4.128,1.215,10001.0,1.23,4.0,1.215,10001.0,1.246,6.239,1.215,10001.0,2.977000,3.520000,2.911000
output_gpu,deepfm,Fused_Mul_Reshape_Add_Cast_Add_Cast_Add_Maximum_Mul_Sub_Abs_Neg_Exp_Add_Log_Add__more_split_11752474693856948763_kernel,1001.0,1.819,4.672,1.791,1001.0,1.816,2.336,1.791,10001.0,1.818,4.768,1.76,10001.0,1.818,4.768,1.76,10001.0,1.819,4.704,1.76,10001.0,1.825,6.752,1.76,10001.0,3.602000,4.192000,3.520000
output_gpu,deepfm,Fused_Cast_split_16126699656595961669_kernel,1001.0,1.421,4.16,1.376,1001.0,1.419,1.7919999999999998,1.4069999999999998,10001.0,1.419,4.288,1.375,10001.0,1.419,4.288,1.375,10001.0,1.419,4.256,1.376,10001.0,1.421,7.68,1.376,10001.0,3.440000,4.000000,3.359000
output_gpu,deepfm,Fused_Mul_split_3672699994263214826_kernel,1001.0,1.237,3.904,1.215,1001.0,1.243,5.152,1.215,10001.0,1.237,4.096,1.215,10001.0,1.237,4.096,1.215,10001.0,1.236,4.0,1.215,10001.0,1.235,10.624,1.215,10001.0,3.165000,3.712000,3.103000
output_gpu,deepfm,Fused_BroadcastTo_Mul_Mul_Add_Mul_split_9125222383637472076_kernel,1001.0,218.104,223.261,217.405,1001.0,218.142,221.021,217.341,10001.0,218.432,223.26,217.276,10001.0,218.432,223.26,217.276,10001.0,218.436,222.78,217.30900000000003,10001.0,218.451,220.861,217.405,10001.0,220.332,223.133,219.326
output_gpu,deepfm,Fused_Cast_Add_split_6834618630944280596_kernel,1001.0,59.88399999999999,63.999,59.423,1001.0,51.615,56.767,50.815,10001.0,49.732,55.96700000000001,48.191,10001.0,49.732,55.96700000000001,48.191,10001.0,49.95200000000001,56.031000000000006,47.71100000000001,10001.0,50.051,766.772,47.935,10001.0,52.725,55.519,51.935
output_gpu,deepfm,Fused_Cast_Mul_split_7051392259342517497_kernel,1001.0,1.436,3.968,1.4069999999999998,1001.0,1.4380000000000002,2.08,1.4069999999999998,10001.0,1.433,4.063,1.4069999999999998,10001.0,1.433,4.063,1.4069999999999998,10001.0,1.433,4.064,1.407,10001.0,1.232,1.472,1.215,10001.0,3.050000,3.488000,3.007000
output_gpu,deepfm,Fused_Mul_split_7731327840381384245_kernel,1001.0,1.226,3.904,1.1840000000000002,1001.0,1.226,4.256,1.1840000000000002,10001.0,1.224,4.032,1.183,10001.0,1.224,4.032,1.183,10001.0,1.224,4.032,1.183,10001.0,1.223,1.6,1.184,10001.0,3.166000,3.777000,3.103000
output_gpu,deepfm,Fused_Cast_ReduceSum_split_18187264574057599824_kernel,1001.0,170.19400000000002,185.822,157.342,1001.0,149.259,169.822,148.31799999999998,10001.0,21.238000000000003,31.743,20.352,10001.0,21.238000000000003,31.743,20.352,10001.0,20.939,31.84,20.415,10001.0,20.666,29.407,20.223,10001.0,22.825000,31.584,22.304000
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.222,3.04,1.183,1001.0,1.223,1.536,1.1840000000000002,10001.0,1.222,3.2,1.183,10001.0,1.222,3.2,1.183,10001.0,1.222,3.2,1.183,10001.0,1.04,1.44,0.992,10001.0,2.892000,4.352000,2.815000
output_gpu,deepfm,Fused_Cast_split_9939745215965046806_kernel,1001.0,26.849,32.191,26.304,1001.0,26.847,32.256,26.24,10001.0,26.724,32.160000000000004,25.759,10001.0,26.724,32.160000000000004,25.759,10001.0,26.549,32.126999999999995,25.728,10001.0,28.671,31.232,26.944,10001.0,28.962999999999997,32.799,28.16
output_gpu,deepfm,Fused_Mul_fusion_9061280092631476395_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.383,8.736,1.3430000000000002,10001.0,1.369,3.392,1.3430000000000002,10001.0,1.369,3.392,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.369,1.888,1.343,10001.0,3.022000,4.640000,2.975000
output_gpu,deepfm,Fused_Cast_Add_split_17520633874024788448_kernel,1001.0,16.176,20.736,15.775,1001.0,14.187,16.736,13.824000000000002,10001.0,14.182,19.072,13.663,10001.0,14.182,19.072,13.663,10001.0,14.195,19.392,13.343,10001.0,14.264,16.607,13.824,10001.0,16.086000,18.879000,15.424000
output_gpu,deepfm,Fused_Mul_BroadcastTo_Mul_Mul_Reshape_BroadcastTo_Neg_Reshape_BroadcastTo_Mul_Mu_more_split_8802705259846963971_kernel,1001.0,2575.732,2678.43,2554.911,1001.0,629.9159999999999,1430.507,626.679,10001.0,628.288,637.139,621.364,10001.0,628.288,637.139,621.364,10001.0,628.284,638.069,621.4920000000002,10001.0,628.662,1344.906,626.6460000000001,10001.0,632.8549999999999,637.434,625.274
output_gpu,deepfm,Fused_Cast_ReduceSum_split_14289495857752519911_kernel,1001.0,51.049,53.983,51.007,1001.0,51.034,51.487,50.975,10001.0,5.672999999999999,8.863999999999999,5.12,10001.0,5.672999999999999,8.863999999999999,5.12,10001.0,4.848,9.792,4.735,10001.0,4.197,5.44,4.096,10001.0,6.807000,7.872000,6.496000
output_gpu,deepfm,Fused_Mul_Reshape_Add_Cast_Add_Cast_Add_Neg_Exp_Add_RealDiv_split_7810111563634315399_kernel,1001.0,1.536,4.512,1.503,1001.0,1.538,1.984,1.503,10001.0,1.535,6.656000000000001,1.503,10001.0,1.535,6.656000000000001,1.503,10001.0,1.534,4.384,1.503,10001.0,1.535,1.984,1.503,10001.0,3.296000,4.224000,3.231000
output_gpu,deepfm,Fused_Mul_Add_Mul_Add_split_13342087699428783147_kernel,1001.0,1.408,3.36,1.375,1001.0,1.413,2.176,1.376,10001.0,1.4069999999999998,3.4560000000000004,1.375,10001.0,1.4069999999999998,3.4560000000000004,1.375,10001.0,1.406,3.488,1.375,10001.0,1.212,7.231,1.183,10001.0,3.029000,3.776000,2.975000
output_gpu,deepfm,Fused_Cast_split_9353551539531787746_kernel,1001.0,1.436,3.968,1.408,1001.0,1.4369999999999998,4.032,1.408,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.434,4.16,1.407,10001.0,1.434,1.728,1.407,10001.0,3.164000,3.616000,3.103000
output_gpu,deepfm,Fused_Mul_fusion_548583029185277258_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.37,1.888,1.3430000000000002,10001.0,1.368,3.584,1.3430000000000002,10001.0,1.368,3.584,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.369,6.72,1.343,10001.0,3.039000,4.288000,2.975000
output_gpu,deepfm,Fused_Cast_Add_split_16080452064302553389_kernel,1001.0,120.162,124.191,119.039,1001.0,98.964,103.871,98.111,10001.0,95.14,103.614,93.31,10001.0,95.14,103.614,93.31,10001.0,95.316,103.55,93.054,10001.0,111.339,866.29,109.054,10001.0,99.81099999999999,101.823,98.495
output_gpu,deepfm,Fused_Cast_ReduceSum_split_4345341268849637758_kernel,1001.0,100.064,102.719,99.902,1001.0,86.10799999999999,87.51899999999999,85.79,10001.0,13.675,19.583,12.64,10001.0,13.675,19.583,12.64,10001.0,12.317,20.064,11.36,10001.0,11.78,17.951999999999998,11.072,10001.0,14.944000,20.096,14.080000
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_5515132648152527640_kernel,1001.0,1.053,3.648,1.023,1001.0,1.057,1.344,1.023,10001.0,1.069,9.6,1.023,10001.0,1.069,9.6,1.023,10001.0,1.056,3.84,1.023,10001.0,0.917,1.376,0.895,10001.0,2.816000,3.360000,2.751000
output_gpu,deepfm,Fused_Cast_Add_Greater_Cast_Mul_split_5351850128657482433_kernel,1001.0,121.368,125.822,120.575,1001.0,100.164,102.239,99.359,10001.0,95.979,104.093,94.111,10001.0,95.979,104.093,94.111,10001.0,96.03,105.118,93.758,10001.0,110.648,114.686,109.022,10001.0,101.243,106.879,99.839
output_gpu,deepfm,Fused_Mul_split_18423090061797319888_kernel,1001.0,1.43,3.968,1.4069999999999998,1001.0,1.4280000000000002,1.664,1.4069999999999998,10001.0,1.43,9.504,1.4069999999999998,10001.0,1.43,9.504,1.4069999999999998,10001.0,1.432,9.6,1.407,10001.0,1.428,1.664,1.407,10001.0,3.061000,3.456000,3.007000
output_gpu,deepfm,Fused_Cast_Mul_split_9055599974624446663_kernel,1001.0,2.076,4.864,2.047,1001.0,2.075,2.944,2.047,10001.0,2.073,4.928,2.047,10001.0,2.073,4.928,2.047,10001.0,2.073,4.928,2.047,10001.0,1.785,2.208,1.759,10001.0,3.844000,4.480000,3.775000
output_gpu,deepfm,Fused_Mul_ReduceSum_split_10063155855610842188_kernel,1001.0,20.158,22.367,19.871,1001.0,17.212,18.528,16.991,10001.0,2.625,6.112,2.56,10001.0,2.625,6.112,2.56,10001.0,2.624,9.664,2.56,10001.0,2.634,5.984,2.56,10001.0,4.692000,5.791000,4.607000
output_gpu,deepfm,Fused_BroadcastTo_inplace_assign_builder_10812234467967346125_kernel,1001.0,1.104,3.744,1.087,1001.0,1.112,1.6,1.087,10001.0,1.106,3.936,1.087,10001.0,1.106,3.936,1.087,10001.0,1.104,3.84,1.056,10001.0,0.956,1.856,0.927,10001.0,2.911000,3.392000,2.847000
output_gpu,deepfm,Fused_Cast_Mul_split_10405421054794923061_kernel,1001.0,3.574,6.496,3.519,1001.0,3.573,3.968,3.519,10001.0,3.5580000000000003,6.624,3.488,10001.0,3.5580000000000003,6.624,3.488,10001.0,3.56,6.592,3.488,10001.0,2.894,3.424,2.688,10001.0,5.292000,6.080000,5.215000
output_gpu,mobilenetv2,Fused_Mul_split_1342664299784032915_kernel,1001.0,1.372,3.392,1.3430000000000002,1001.0,1.369,1.984,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.178,1.664,1.151,10001.0,3.031000,3.840000,2.943000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_1464885464384369972_kernel,1001.0,3.698,6.816,3.647,1001.0,3.699,4.352,3.647,10001.0,3.646,9.44,3.615,10001.0,3.646,9.44,3.615,10001.0,3.645,7.104,3.615,10001.0,3.655,4.544,3.615,10001.0,5.816000,7.103000,5.727000
output_gpu,mobilenetv2,Fused_Mul_fusion_16645970179795583297_kernel,1001.0,162.524,168.958,160.734,1001.0,144.97299999999998,148.765,144.221,10001.0,144.85600000000005,152.062,143.90200000000004,10001.0,144.85600000000005,152.062,143.90200000000004,10001.0,144.832,151.805,143.90200000000004,10001.0,140.095,144.798,139.294,10001.0,147.783,152.511,146.686
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_13857445620540168930_kernel,1001.0,1.385,4.096,1.344,1001.0,1.385,1.92,1.344,10001.0,1.385,4.768,1.344,10001.0,1.385,4.768,1.344,10001.0,1.384,4.224,1.343,10001.0,1.384,1.76,1.344,10001.0,3.404000,3.968000,3.327000
output_gpu,mobilenetv2,Fused_Mul_fusion_13164648357494835259_kernel,1001.0,54.483,59.999,53.919,1001.0,54.402,57.504,53.727,10001.0,54.423,60.99100000000001,53.695,10001.0,54.423,60.99100000000001,53.695,10001.0,54.409,60.511,53.631,10001.0,54.355,58.08,53.504000000000005,10001.0,56.599999999999994,60.448,55.839
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_1883640624310466778_kernel,1001.0,5.426,8.192,5.375,1001.0,5.456,8.512,5.407,10001.0,5.356,8.48,5.311,10001.0,5.356,8.48,5.311,10001.0,5.359,8.224,5.311,10001.0,5.351,5.792,5.28,10001.0,7.589000,8.192000,7.519000
output_gpu,mobilenetv2,Fused_RealDiv_Mul_Add_fusion_16910873801011469393_kernel,1001.0,76.237,81.535,75.583,1001.0,76.237,81.727,75.455,10001.0,76.09,82.04599999999998,75.19800000000002,10001.0,76.09,82.04599999999998,75.19800000000002,10001.0,76.10600000000001,83.07000000000002,75.23100000000002,10001.0,76.125,795.125,75.199,10001.0,78.829,82.719,77.695
output_gpu,mobilenetv2,Fused_Mul_ReduceSum_Add_split_64190903711288141_kernel,1001.0,5.494,8.576,5.439,1001.0,5.495,8.544,5.439,10001.0,5.412999999999999,8.767999999999999,5.343999999999999,10001.0,5.412999999999999,8.767999999999999,5.343999999999999,10001.0,5.414,8.319,5.344,10001.0,5.409,9.504,5.344,10001.0,7.658000,8.320000,7.583000
output_gpu,mobilenetv2,Fused_RealDiv_fusion_13076568714319119018_kernel,1001.0,73.122,78.975,72.607,1001.0,72.918,76.446,72.383,10001.0,72.93299999999998,79.551,72.318,10001.0,72.93299999999998,79.551,72.318,10001.0,72.984,79.327,72.286,10001.0,70.049,929.428,69.119,10001.0,75.50800000000001,79.167,74.55799999999999
output_gpu,mobilenetv2,Fused_RealDiv_Mul_Add_fusion_34820967813013138_kernel,1001.0,212.747,218.461,211.261,1001.0,219.327,225.564,218.268,10001.0,219.425,225.628,218.044,10001.0,219.425,225.628,218.044,10001.0,219.408,225.98,218.076,10001.0,219.495,1005.363,218.237,10001.0,221.149,225.374,219.87
output_gpu,mobilenetv2,Fused_Mul_fusion_7572473347176879639_kernel,1001.0,76.979,82.655,76.383,1001.0,76.84100000000001,79.87100000000001,76.158,10001.0,76.88600000000001,82.97399999999999,75.99799999999998,10001.0,76.88600000000001,82.97399999999999,75.99799999999998,10001.0,76.874,82.655,76.15899999999999,10001.0,76.907,803.765,76.127,10001.0,78.994,82.911,78.143
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_3922645650335133295_kernel,1001.0,1.239,3.936,1.215,1001.0,1.236,1.6,1.215,10001.0,1.236,4.384,1.215,10001.0,1.236,4.384,1.215,10001.0,1.236,4.031,1.215,10001.0,1.237,4.32,1.215,10001.0,3.192000,3.967000,3.135000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_14370067099037594156_kernel,1001.0,1.261,3.968,1.247,1001.0,1.259,1.856,1.247,10001.0,1.259,4.288,1.247,10001.0,1.259,4.288,1.247,10001.0,1.259,4.032,1.247,10001.0,1.273,3.648,1.247,10001.0,3.208000,3.776000,3.167000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_16904778296983262158_kernel,1001.0,2.273,5.343999999999999,2.239,1001.0,2.269,2.944,2.239,10001.0,2.2230000000000003,3.008,2.207,10001.0,2.2230000000000003,3.008,2.207,10001.0,2.22,5.536,2.176,10001.0,2.224,11.2,2.175,10001.0,4.215000,5.280000,4.127000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_16029577570421196396_kernel,1001.0,1.675,4.512,1.632,1001.0,1.67,2.08,1.631,10001.0,1.67,4.736000000000001,1.631,10001.0,1.67,4.736000000000001,1.631,10001.0,1.67,4.48,1.631,10001.0,1.672,3.008,1.631,10001.0,3.680000,4.256000,3.615000
output_gpu,mobilenetv2,Fused_Add_fusion_10034880499458677202_kernel,1001.0,19.127,25.184,18.56,1001.0,19.114,25.152,18.624,10001.0,19.092,25.568,18.464,10001.0,19.092,25.568,18.464,10001.0,19.082,25.056,18.399,10001.0,18.663,25.183,18.112,10001.0,21.31,25.024,20.224
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_3555465180584210656_kernel,1001.0,1.516,4.352,1.472,1001.0,1.514,1.92,1.472,10001.0,1.513,4.736000000000001,1.471,10001.0,1.513,4.736000000000001,1.471,10001.0,1.512,4.384,1.471,10001.0,1.515,1.888,1.472,10001.0,3.523000,4.192000,3.455000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_10977476055007985136_kernel,1001.0,1.24,3.936,1.215,1001.0,1.238,1.632,1.215,10001.0,1.238,4.256,1.215,10001.0,1.238,4.256,1.215,10001.0,1.238,4.096,1.215,10001.0,1.239,1.6,1.215,10001.0,3.174000,3.712000,3.103000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_15687251160003611935_kernel,1001.0,2.8080000000000003,5.856,2.752,1001.0,2.806,3.36,2.783,10001.0,2.752,6.303999999999999,2.719,10001.0,2.752,6.303999999999999,2.719,10001.0,2.753,5.92,2.719,10001.0,2.751,3.456,2.719,10001.0,4.855000,5.728000,4.767000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_7765724516682757000_kernel,1001.0,1.265,3.936,1.247,1001.0,1.266,4.192,1.247,10001.0,1.265,4.256,1.247,10001.0,1.265,4.256,1.247,10001.0,1.263,4.032,1.247,10001.0,1.093,1.568,1.055,10001.0,3.243000,4.064000,3.167000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_10225681946839001081_kernel,1001.0,3.633,7.007999999999999,3.52,1001.0,3.633,4.64,3.552,10001.0,3.628,7.231,3.52,10001.0,3.628,7.231,3.52,10001.0,3.628,6.816,3.519,10001.0,2.831,3.679,2.72,10001.0,5.877000,6.688000,5.728000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_8837079016072649350_kernel,1001.0,2.004,4.9910000000000005,1.952,1001.0,2.002,2.592,1.951,10001.0,2.0,5.28,1.951,10001.0,2.0,5.28,1.951,10001.0,2.002,4.96,1.952,10001.0,1.763,2.272,1.727,10001.0,4.110000,4.768000,4.032000
output_gpu,mobilenetv2,Fused_Reshape_Tile_RealDiv_fusion_17607579482584388844_kernel,1001.0,69.183,74.207,68.639,1001.0,70.982,72.094,70.079,10001.0,65.79400000000001,72.351,62.975,10001.0,65.79400000000001,72.351,62.975,10001.0,66.25200000000001,73.95,63.07100000000001,10001.0,66.072,556.057,63.071000000000005,10001.0,71.453,73.247,69.92
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_7119881761744762288_kernel,1001.0,20.677,26.303,20.096,1001.0,20.728,24.831,20.191,10001.0,20.636000000000006,27.008000000000003,19.936,10001.0,20.636000000000006,27.008000000000003,19.936,10001.0,20.668000000000003,26.591,19.999,10001.0,20.161,23.775,19.488,10001.0,23.004,26.912,21.663999999999998
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_8187148596769453932_kernel,1001.0,1.277,4.0,1.247,1001.0,1.276,4.256,1.247,10001.0,1.273,4.351,1.247,10001.0,1.273,4.351,1.247,10001.0,1.275,4.191,1.247,10001.0,1.1,1.44,1.087,10001.0,3.232000,3.776000,3.167000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_11392482609354099158_kernel,1001.0,2.424,5.5360000000000005,2.399,1001.0,2.42,3.136,2.399,10001.0,2.382,5.888,2.336,10001.0,2.382,5.888,2.336,10001.0,2.38,5.696,2.336,10001.0,2.383,3.232,2.336,10001.0,4.406000,5.407000,4.351000
output_gpu,mobilenetv2,Fused_RealDiv_fusion_14514309178616297254_kernel,1001.0,144.481,150.30200000000002,143.74200000000002,1001.0,144.504,150.782,143.90200000000002,10001.0,144.315,150.781,143.613,10001.0,144.315,150.781,143.613,10001.0,144.308,150.429,143.613,10001.0,138.74900000000002,842.741,137.69400000000002,10001.0,146.655,150.46200000000002,145.91899999999998
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_15748525942428279523_kernel,1001.0,1.342,4.096,1.311,1001.0,1.34,1.76,1.311,10001.0,1.339,4.448,1.311,10001.0,1.339,4.448,1.311,10001.0,1.338,4.192,1.311,10001.0,1.339,1.92,1.311,10001.0,3.319000,4.128000,3.232000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_9425642578143992248_kernel,1001.0,1.689,4.48,1.663,1001.0,1.686,2.144,1.663,10001.0,1.6840000000000002,4.831,1.663,10001.0,1.6840000000000002,4.831,1.663,10001.0,1.685,4.544,1.663,10001.0,1.686,2.176,1.663,10001.0,3.703000,4.384000,3.647000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_15051326277384382012_kernel,1001.0,2.006,4.896,1.952,1001.0,2.002,2.592,1.952,10001.0,2.0,5.216,1.952,10001.0,2.0,5.216,1.952,10001.0,2.0,4.992,1.951,10001.0,2.053,2.624,2.015,10001.0,4.104000,4.864000,4.031000
output_gpu,mobilenetv2,Fused_ReduceMean_4445296748968516267_kernel,1001.0,67.222,80.223,66.592,1001.0,67.225,80.607,66.623,10001.0,67.453,79.582,66.303,10001.0,67.453,79.582,66.303,10001.0,67.57000000000001,79.743,66.303,10001.0,66.701,74.719,66.335,10001.0,70.05699999999999,78.943,69.247000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_12889760271667858406_kernel,1001.0,2.323,5.44,2.303,1001.0,2.321,3.072,2.303,10001.0,2.275,5.824,2.239,10001.0,2.275,5.824,2.239,10001.0,2.274,5.536,2.239,10001.0,2.272,3.168,2.239,10001.0,4.271000,5.504000,4.223000
output_gpu,mobilenetv2,Fused_Add_fusion_17578593068380632040_kernel,1001.0,165.733,171.294,164.99,1001.0,165.782,171.389,165.021,10001.0,165.769,170.972,164.89200000000002,10001.0,165.769,170.972,164.89200000000002,10001.0,165.775,170.653,164.957,10001.0,161.28400000000002,887.124,160.446,10001.0,167.52800000000002,171.07,166.655
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_17807042815232997142_kernel,1001.0,1.342,4.096,1.311,1001.0,1.342,1.984,1.311,10001.0,1.339,4.512,1.311,10001.0,1.339,4.512,1.311,10001.0,1.339,4.288,1.311,10001.0,1.339,1.76,1.311,10001.0,3.316000,4.032000,3.263000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_2000949605676586887_kernel,1001.0,1.242,3.936,1.215,1001.0,1.243,3.968,1.215,10001.0,1.24,4.288,1.215,10001.0,1.24,4.288,1.215,10001.0,1.241,9.183,1.215,10001.0,1.242,8.32,1.215,10001.0,3.182000,3.712000,3.104000
output_gpu,mobilenetv2,Fused_Mul_fusion_13733023487931009039_kernel,1001.0,99.269,105.758,98.527,1001.0,87.759,91.679,87.07,10001.0,87.75299999999999,94.59,86.94200000000002,10001.0,87.75299999999999,94.59,86.94200000000002,10001.0,87.684,94.399,86.91,10001.0,84.823,88.639,84.063,10001.0,90.343,94.847,89.43900000000001
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_14093403272156553339_kernel,1001.0,1.242,3.936,1.215,1001.0,1.24,1.696,1.215,10001.0,1.241,4.256,1.215,10001.0,1.241,4.256,1.215,10001.0,1.24,4.064,1.215,10001.0,1.24,2.112,1.215,10001.0,3.180000,3.744000,3.103000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_2267131064926800831_kernel,1001.0,1.341,4.096,1.311,1001.0,1.3430000000000002,4.384,1.311,10001.0,1.339,4.416,1.311,10001.0,1.339,4.416,1.311,10001.0,1.339,4.192,1.311,10001.0,1.34,4.48,1.311,10001.0,3.326000,3.936000,3.263000
output_gpu,mobilenetv2,Fused_Add_fusion_2557389588245675257_kernel,1001.0,30.44,35.999,29.792,1001.0,30.441,34.111,29.855,10001.0,30.35,36.895,29.472,10001.0,30.35,36.895,29.472,10001.0,30.371,36.48,29.599,10001.0,29.17,682.711,28.511000000000003,10001.0,32.247,35.391,31.264
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_13258278058738367338_kernel,1001.0,2.81,6.08,2.72,1001.0,2.819,8.448,2.72,10001.0,2.805,6.688,2.719,10001.0,2.805,6.688,2.719,10001.0,2.81,6.048,2.719,10001.0,2.584,3.296,2.527,10001.0,5.029000,6.112000,4.895000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_1942613708083766739_kernel,1001.0,1.386,4.128,1.344,1001.0,1.39,4.224,1.375,10001.0,1.384,4.512,1.344,10001.0,1.384,4.512,1.344,10001.0,1.383,4.64,1.343,10001.0,1.384,1.76,1.344,10001.0,3.416000,3.969000,3.359000
output_gpu,mobilenetv2,Fused_Add_fusion_15467650381922100690_kernel,1001.0,43.3,48.767,42.687,1001.0,43.299,47.135,42.656000000000006,10001.0,43.2,49.023,42.464,10001.0,43.2,49.023,42.464,10001.0,43.21,48.703,42.431,10001.0,41.856,45.535,41.247,10001.0,45.394,48.927,44.511
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_13883274358138881903_kernel,1001.0,2.9410000000000003,6.047999999999999,2.879,1001.0,2.938,3.616,2.88,10001.0,2.926,6.528,2.88,10001.0,2.926,6.528,2.88,10001.0,2.925,6.272,2.879,10001.0,2.926,9.536,2.879,10001.0,5.026000,6.048000,4.959000
output_gpu,mobilenetv2,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.224,2.9760000000000004,1.183,1001.0,1.222,1.536,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.222,3.04,1.183,10001.0,1.209,1.6,1.183,10001.0,3.068000,3.648000,3.007000
output_gpu,mobilenetv2,Fused_RealDiv_fusion_9044847223339210436_kernel,1001.0,87.54799999999999,93.727,86.943,1001.0,87.55,91.039,86.87899999999999,10001.0,87.36200000000001,93.662,86.68599999999998,10001.0,87.36200000000001,93.662,86.68599999999998,10001.0,87.363,93.534,86.655,10001.0,84.052,943.955,83.134,10001.0,89.53899999999999,93.599,88.83099999999999
output_gpu,mobilenetv2,Fused_Mul_fusion_15419329260743817884_kernel,1001.0,98.475,104.959,96.991,1001.0,88.25299999999999,91.646,87.583,10001.0,87.87299999999999,94.526,87.03800000000003,10001.0,87.87299999999999,94.526,87.03800000000003,10001.0,87.792,94.399,87.039,10001.0,85.096,858.741,84.15899999999999,10001.0,90.24,94.207,89.311
output_gpu,mobilenetv2,Fused_BroadcastTo_inplace_assign_builder_4261862423109646487_kernel,1001.0,1.063,3.808,1.023,1001.0,1.063,1.472,1.024,10001.0,1.065,4.0,1.023,10001.0,1.065,4.0,1.023,10001.0,1.061,3.776,1.023,10001.0,1.065,1.504,1.024,10001.0,3.009000,3.712000,2.943000
output_gpu,mobilenetv2,Fused_Reshape_Sub_Exp_ReduceSum_split_4086276757985005905_kernel,1001.0,7.596,10.496,7.519,1001.0,7.587000000000001,8.256,7.519,10001.0,3.111,7.167999999999999,3.071,10001.0,3.111,7.167999999999999,3.071,10001.0,3.111,6.72,3.071,10001.0,2.668,3.808,2.655,10001.0,4.806000,5.888000,4.735000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_9776070476940196447_kernel,1001.0,1.279,4.128,1.247,1001.0,1.277,1.664,1.247,10001.0,1.276,4.511,1.247,10001.0,1.276,4.511,1.247,10001.0,1.273,4.096,1.247,10001.0,1.274,1.824,1.247,10001.0,3.245000,3.968000,3.167000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_9747693032472106124_kernel,1001.0,1.3219999999999998,4.032,1.28,1001.0,1.3219999999999998,1.664,1.28,10001.0,1.321,9.056,1.279,10001.0,1.321,9.056,1.279,10001.0,1.32,4.128,1.279,10001.0,1.357,2.24,1.279,10001.0,3.316000,3.968000,3.263000
output_gpu,mobilenetv2,Fused_ReduceSum_split_4739669179109504223_kernel,1001.0,5.464,8.416,5.408,1001.0,5.472,9.472,5.408,10001.0,2.29,5.408,2.24,10001.0,2.29,5.408,2.24,10001.0,1.921,4.863,1.887,10001.0,1.653,2.112,1.631,10001.0,3.647000,4.288000,3.583000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_15291847292967690653_kernel,1001.0,1.276,4.032,1.247,1001.0,1.2819999999999998,10.24,1.247,10001.0,1.273,1.7280000000000002,1.247,10001.0,1.273,1.7280000000000002,1.247,10001.0,1.273,4.288,1.247,10001.0,1.273,1.664,1.247,10001.0,3.245000,4.896000,3.168000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_9161410357860483328_kernel,1001.0,1.272,4.128,1.247,1001.0,1.269,1.632,1.247,10001.0,1.269,4.351,1.247,10001.0,1.269,4.351,1.247,10001.0,1.27,4.095,1.247,10001.0,1.098,1.472,1.055,10001.0,3.245000,3.808000,3.167000
output_gpu,mobilenetv2,Fused_Mul_fusion_10004199505558456955_kernel,1001.0,157.721,164.509,155.998,1001.0,145.53,152.51,144.829,10001.0,144.96800000000005,152.317,143.965,10001.0,144.96800000000005,152.317,143.965,10001.0,144.997,151.64600000000004,143.965,10001.0,140.29500000000002,927.315,139.326,10001.0,147.62,151.93499999999997,146.623
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_2526128329842793602_kernel,1001.0,1.269,4.096,1.247,1001.0,1.27,4.256,1.247,10001.0,1.267,4.48,1.247,10001.0,1.267,4.48,1.247,10001.0,1.267,4.096,1.247,10001.0,1.267,4.288,1.247,10001.0,3.219000,3.808000,3.167000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_4177582663050197895_kernel,1001.0,2.591,5.76,2.559,1001.0,2.599,9.696,2.559,10001.0,2.5380000000000003,6.047999999999999,2.495,10001.0,2.5380000000000003,6.047999999999999,2.495,10001.0,2.539,5.952,2.495,10001.0,2.54,3.456,2.496,10001.0,4.554000,5.632000,4.480000
output_gpu,mobilenetv2,Fused_Sub_Exp_ReduceSum_split_17146440480583811922_kernel,1001.0,7.667999999999999,10.592,7.551,1001.0,7.681,10.592,7.552,10001.0,3.1260000000000003,7.007999999999999,3.071,10001.0,3.1260000000000003,7.007999999999999,3.071,10001.0,3.13,6.72,3.072,10001.0,2.677,3.84,2.655,10001.0,4.802000,5.825000,4.735000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_5996352702169476771_kernel,1001.0,1.272,3.968,1.247,1001.0,1.269,1.631,1.247,10001.0,1.27,4.32,1.247,10001.0,1.27,4.32,1.247,10001.0,1.27,4.064,1.247,10001.0,1.27,4.64,1.247,10001.0,3.221000,3.744000,3.167000
output_gpu,mobilenetv2,Fused_ReduceMean_728894389712972095_kernel,1001.0,41.79,53.92,41.247,1001.0,41.786,51.967,41.216,10001.0,41.894,53.823,40.96,10001.0,41.894,53.823,40.96,10001.0,42.105,53.375,40.927,10001.0,41.291,50.336,40.895,10001.0,44.872,54.718999999999994,43.744000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_6172046689355442704_kernel,1001.0,1.969,4.864,1.951,1001.0,1.966,2.656,1.951,10001.0,1.965,5.152,1.919,10001.0,1.965,5.152,1.919,10001.0,1.965,4.928,1.919,10001.0,1.973,5.536,1.951,10001.0,4.051000,4.736000,3.968000
output_gpu,mobilenetv2,Fused_RealDiv_Mul_Add_fusion_12809191724530098095_kernel,1001.0,298.168,303.836,297.052,1001.0,306.043,310.267,304.827,10001.0,306.245,312.826,304.826,10001.0,306.245,312.826,304.826,10001.0,306.243,312.891,305.019,10001.0,306.589,1056.594,304.924,10001.0,308.044,312.574,306.653
output_gpu,mobilenetv2,Fused_Mul_Add_split_8290303533966739766_kernel,1001.0,1.239,4.064,1.215,1001.0,1.238,2.144,1.215,10001.0,1.238,9.408,1.215,10001.0,1.238,9.408,1.215,10001.0,1.239,4.192,1.215,10001.0,1.242,12.511,1.215,10001.0,3.192000,4.032000,3.135000
output_gpu,mobilenetv2,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.422,1.952,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.407,10001.0,1.231,1.76,1.215,10001.0,3.285000,3.968000,3.200000
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_12553909585863355692_kernel,1001.0,1.675,4.5760000000000005,1.632,1001.0,1.6769999999999998,4.672,1.631,10001.0,1.67,4.735,1.631,10001.0,1.67,4.735,1.631,10001.0,1.67,4.512,1.631,10001.0,1.677,5.472,1.632,10001.0,3.665000,4.256000,3.615000
output_gpu,mobilenetv2,Fused_Mul_fusion_11667805904163872099_kernel,1001.0,151.4,156.894,150.654,1001.0,157.52100000000002,164.509,155.645,10001.0,158.31300000000005,163.965,155.70899999999995,10001.0,158.31300000000005,163.965,155.70899999999995,10001.0,158.303,163.22899999999998,155.453,10001.0,158.53300000000002,870.1,155.96599999999998,10001.0,159.816,164.99,157.758
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_7607684200719016090_kernel,1001.0,1.265,3.935,1.247,1001.0,1.267,4.192,1.247,10001.0,1.09,1.44,1.055,10001.0,1.09,1.44,1.055,10001.0,1.263,4.032,1.247,10001.0,1.091,1.44,1.055,10001.0,3.228000,3.808000,3.167000
output_gpu,mobilenetv2,Fused_RealDiv_Mul_Add_fusion_8732084563362980160_kernel,1001.0,108.423,114.302,107.359,1001.0,108.225,111.134,107.23,10001.0,108.256,114.43,106.942,10001.0,108.256,114.43,106.942,10001.0,108.252,114.494,107.006,10001.0,108.35,792.501,107.07,10001.0,110.31299999999999,113.27900000000001,108.991
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_17045368157683029199_kernel,1001.0,1.3019999999999998,4.0,1.279,1001.0,1.3,1.664,1.279,10001.0,1.304,4.544,1.279,10001.0,1.304,4.544,1.279,10001.0,1.3,4.096,1.279,10001.0,1.301,7.36,1.279,10001.0,3.272000,3.840000,3.200000
output_gpu,mobilenetv2,Fused_Mul_fusion_16831524680947881506_kernel,1001.0,211.146,217.758,210.27,1001.0,220.635,224.604,218.812,10001.0,220.525,225.33900000000003,217.24400000000003,10001.0,220.525,225.33900000000003,217.24400000000003,10001.0,220.584,225.212,217.212,10001.0,220.565,912.915,217.245,10001.0,221.771,226.62199999999999,219.67800000000003
output_gpu,mobilenetv2,Fused_Reshape_Add_split_8984716591323419489_kernel,1001.0,2.789,5.696000000000001,2.719,1001.0,2.421,5.44,2.3040000000000003,10001.0,2.404,5.568,2.272,10001.0,2.404,5.568,2.272,10001.0,2.409,5.376,2.272,10001.0,1.925,2.592,1.856,10001.0,4.223000,4.736000,4.096000
output_gpu,mobilenetv2,Fused_Mul_BiasAddGrad_Add_fusion_16718397833147263956_kernel,1001.0,4.503,7.423999999999999,4.448,1001.0,4.497,4.96,4.447,10001.0,4.47,7.616,4.415,10001.0,4.47,7.616,4.415,10001.0,4.455,8.287,4.384,10001.0,4.467,4.992,4.415,10001.0,6.661000,7.360000,6.528000
output_gpu,mobilenetv2,Fused_Add_fusion_8671364787500390138_kernel,1001.0,56.69,62.591,55.967,1001.0,56.692,59.999,55.999,10001.0,56.529,62.207,55.742,10001.0,56.529,62.207,55.742,10001.0,56.547,62.526,55.711000000000006,10001.0,55.282,978.963,54.496,10001.0,58.991,61.695,57.984
output_gpu,mobilenetv2,Fused_Mul_Add_fusion_4314068573008571148_kernel,1001.0,1.265,3.967,1.247,1001.0,1.273,5.376,1.247,10001.0,1.265,4.32,1.247,10001.0,1.265,4.32,1.247,10001.0,1.264,4.032,1.247,10001.0,1.266,4.032,1.247,10001.0,3.234000,3.776000,3.167000
output_gpu,warpctc,Fused_Sqrt_split_3299707972517484487_kernel,1001.0,1.395,3.392,1.375,1001.0,1.394,1.919,1.375,10001.0,1.394,3.4560000000000004,1.375,10001.0,1.394,3.4560000000000004,1.375,10001.0,1.394,3.456,1.375,10001.0,1.394,2.016,1.375,10001.0,3.247000,4.000000,3.199000
output_gpu,warpctc,Fused_Mul_ReduceSum_split_9487618319804607812_kernel,1001.0,1.849,4.032,1.823,1001.0,1.847,2.432,1.823,10001.0,1.847,3.904,1.823,10001.0,1.847,3.904,1.823,10001.0,1.847,3.968,1.823,10001.0,1.847,2.368,1.823,10001.0,3.731000,4.480000,3.679000
output_gpu,warpctc,Fused_Mul_RealDiv_split_10034703891217106231_kernel,1001.0,1.509,3.488,1.472,1001.0,1.507,2.048,1.471,10001.0,1.507,3.552,1.471,10001.0,1.507,3.552,1.471,10001.0,1.507,3.584,1.471,10001.0,1.507,2.048,1.471,10001.0,3.376000,6.240000,3.295000
output_gpu,warpctc,Fused_Reshape_Mul_split_6796268975183852539_kernel,1001.0,1.993,4.864,1.92,1001.0,2.339,5.247999999999999,2.303,10001.0,2.335,5.28,2.303,10001.0,2.335,5.28,2.303,10001.0,2.334,5.152,2.303,10001.0,2.334,2.783,2.303,10001.0,4.433000,5.087000,4.352000
output_gpu,warpctc,Fused_Transpose_split_2001239575898873075_kernel,1001.0,47.049,51.104,45.919,1001.0,48.081,52.287,47.039,10001.0,44.622,53.75899999999999,41.791,10001.0,44.622,53.75899999999999,41.791,10001.0,45.065,53.535,41.855,10001.0,39.025,44.863,34.72,10001.0,43.56,49.503,41.215
output_gpu,warpctc,Fused_Reshape_Mul_ReduceSum_split_14582668912295876395_kernel,1001.0,2.257,4.672,2.239,1001.0,2.255,3.232,2.239,10001.0,2.119,5.28,2.079,10001.0,2.119,5.28,2.079,10001.0,2.12,5.248,2.08,10001.0,2.119,2.912,2.079,10001.0,4.169000,5.120000,4.095000
output_gpu,warpctc,Fused_Mul_ReduceSum_split_1981296571397398758_kernel,1001.0,2.692,4.96,2.655,1001.0,2.6910000000000003,3.488,2.655,10001.0,2.147,5.247000000000001,2.111,10001.0,2.147,5.247000000000001,2.111,10001.0,2.147,5.28,2.111,10001.0,2.146,2.88,2.048,10001.0,4.145000,5.088000,4.095000
output_gpu,warpctc,Fused_ReduceSum_split_17158479222523381165_kernel,1001.0,10.063,13.088,9.984,1001.0,10.071,13.152,9.983,10001.0,2.014,5.376,1.983,10001.0,2.014,5.376,1.983,10001.0,2.015,5.472,1.983,10001.0,2.016,3.008,1.983,10001.0,4.057000,4.960000,3.999000
output_gpu,warpctc,Fused_Mul_ReduceSum_Sqrt_Reshape_Greater_Reshape_Select_split_9040850360720684697_kernel,1001.0,1.453,3.424,1.408,1001.0,1.453,3.424,1.408,10001.0,1.4509999999999998,3.4560000000000004,1.4069999999999998,10001.0,1.4509999999999998,3.4560000000000004,1.4069999999999998,10001.0,1.451,3.519,1.407,10001.0,1.451,1.952,1.408,10001.0,3.334000,4.032000,3.263000
output_gpu,warpctc,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.1840000000000002,1001.0,1.227,6.848,1.183,10001.0,1.223,3.04,1.183,10001.0,1.223,3.04,1.183,10001.0,1.222,3.072,1.183,10001.0,1.208,1.6,1.183,10001.0,3.078000,4.480000,3.007000
output_gpu,warpctc,Fused_Mul_RealDiv_split_12940109977917559051_kernel,1001.0,1.372,4.224,1.3430000000000002,1001.0,1.372,4.448,1.3430000000000002,10001.0,1.368,4.288,1.3430000000000002,10001.0,1.368,4.288,1.3430000000000002,10001.0,1.368,4.288,1.343,10001.0,1.368,4.288,1.343,10001.0,3.351000,4.032000,3.295000
output_gpu,warpctc,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.36,1.3430000000000002,1001.0,1.422,3.584,1.4069999999999998,10001.0,1.42,3.552,1.376,10001.0,1.42,3.552,1.376,10001.0,1.421,3.488,1.376,10001.0,1.432,3.52,1.407,10001.0,3.278000,4.064000,3.199000
output_gpu,warpctc,Fused_BroadcastTo_inplace_assign_builder_11237864080789593165_kernel,1001.0,1.224,2.9760000000000004,1.183,1001.0,1.223,1.536,1.1840000000000002,10001.0,1.224,3.072,1.183,10001.0,1.224,3.072,1.183,10001.0,1.223,1.536,1.183,10001.0,1.223,1.696,1.183,10001.0,3.076000,3.840000,3.007000
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_11320170747834243405_kernel,1001.0,1.054,3.68,1.023,1001.0,1.065,5.696000000000001,1.023,10001.0,1.06,8.927,1.023,10001.0,1.06,8.927,1.023,10001.0,1.063,3.744,1.023,10001.0,1.055,1.472,1.023,10001.0,2.827000,3.296000,2.751000
output_gpu,wide_deep,Fused_Cast_Mul_split_7158963791301647549_kernel,1001.0,1.405,4.128,1.376,1001.0,1.415,4.5760000000000005,1.376,10001.0,1.403,4.6080000000000005,1.375,10001.0,1.403,4.6080000000000005,1.375,10001.0,1.403,4.352,1.375,10001.0,1.206,1.536,1.183,10001.0,3.182000,3.712000,3.135000
output_gpu,wide_deep,Fused_Reshape_Mul_ReduceSum_split_4829910890489653637_kernel,1001.0,28.201,32.607,28.032,1001.0,24.311,26.464,24.159,10001.0,8.573,12.224,8.479,10001.0,8.573,12.224,8.479,10001.0,7.652,10.88,7.519,10001.0,6.938,7.904,6.688,10001.0,9.227000,10.624000,9.087000
output_gpu,wide_deep,Fused_ReduceSum_split_10403650091473542781_kernel,1001.0,3.749,5.9510000000000005,3.68,1001.0,3.747,4.48,3.68,10001.0,2.296,5.856,2.271,10001.0,2.296,5.856,2.271,10001.0,2.297,5.888,2.271,10001.0,2.298,3.616,2.271,10001.0,4.052000,5.440000,3.999000
output_gpu,wide_deep,Fused_Cast_Add_Cast_split_8170337939879956810_kernel,1001.0,1.317,4.096,1.28,1001.0,1.318,2.08,1.28,10001.0,1.315,4.48,1.279,10001.0,1.315,4.48,1.279,10001.0,1.315,4.224,1.279,10001.0,1.32,9.664,1.279,10001.0,3.316000,4.000000,3.232000
output_gpu,wide_deep,Fused_Cast_Add_Greater_Cast_Mul_split_7909609563230723991_kernel,1001.0,31.348000000000003,34.879,30.911,1001.0,27.162,29.343000000000004,26.303,10001.0,26.534,31.904000000000003,25.28,10001.0,26.534,31.904000000000003,25.28,10001.0,26.905,32.031,25.375,10001.0,25.696,28.896,24.832,10001.0,28.819000000000003,31.808000000000003,28.0
output_gpu,wide_deep,Fused_Cast_Add_Greater_Cast_Mul_split_17465771081355149117_kernel,1001.0,60.349,64.319,59.839,1001.0,52.367,54.752,51.743,10001.0,50.668,57.471,48.575,10001.0,50.668,57.471,48.575,10001.0,50.659000000000006,56.479,48.703,10001.0,50.533,54.111,48.671,10001.0,53.894999999999996,58.495,52.607
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_8261444986825493822_kernel,1001.0,1.262,3.744,1.247,1001.0,1.26,1.408,1.247,10001.0,1.26,4.064,1.247,10001.0,1.26,4.064,1.247,10001.0,1.26,3.808,1.247,10001.0,1.26,1.44,1.247,10001.0,3.098000,3.616000,3.039000
output_gpu,wide_deep,Fused_Reshape_Mul_split_11259780438254894762_kernel,1001.0,1.371,3.4560000000000004,1.3430000000000002,1001.0,1.176,1.696,1.151,10001.0,1.369,3.519,1.3430000000000002,10001.0,1.369,3.519,1.3430000000000002,10001.0,1.369,5.984,1.343,10001.0,1.175,1.696,1.151,10001.0,3.018000,3.744000,2.943000
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_3960045433526664861_kernel,1001.0,1.054,3.648,1.023,1001.0,1.057,1.408,1.023,10001.0,1.057,3.968,1.023,10001.0,1.057,3.968,1.023,10001.0,1.06,3.744,1.023,10001.0,1.063,3.616,1.023,10001.0,2.801000,3.200000,2.720000
output_gpu,wide_deep,Fused_Mul_ReduceSum_split_5786251060074629964_kernel,1001.0,28.202,33.216,28.031,1001.0,28.211,33.824,28.031,10001.0,8.565,11.648,8.479,10001.0,8.565,11.648,8.479,10001.0,7.643,10.976,7.519,10001.0,6.932,9.376,6.688,10001.0,9.989000,11.680000,8.384000
output_gpu,wide_deep,Fused_Cast_Add_split_5058870544143620295_kernel,1001.0,31.15,34.816,30.719,1001.0,26.848000000000003,31.647,26.08,10001.0,26.399,31.296,25.023000000000003,10001.0,26.399,31.296,25.023000000000003,10001.0,25.887,31.392000000000003,24.735,10001.0,26.219,29.056,24.864,10001.0,29.381,32.352,27.807
output_gpu,wide_deep,Fused_BroadcastTo_Mul_split_4422170676585195038_kernel,1001.0,5.226,8.192,5.056,1001.0,5.2410000000000005,8.704,5.056,10001.0,5.215,8.415000000000001,5.023,10001.0,5.215,8.415000000000001,5.023,10001.0,5.212,8.224,5.023,10001.0,5.219,7.808,4.992,10001.0,6.831000,8.160000,6.655000
output_gpu,wide_deep,Fused_Cast_Mul_split_15556904825511396780_kernel,1001.0,27.407,32.863,26.943,1001.0,27.417,30.879,26.975,10001.0,27.265,33.055,26.559,10001.0,27.265,33.055,26.559,10001.0,27.315,32.896,26.591,10001.0,25.599,29.792,24.672,10001.0,29.503,33.312000000000005,28.895
output_gpu,wide_deep,Fused_Cast_split_10247996783945338004_kernel,1001.0,2.119,5.024,2.08,1001.0,2.122,2.656,2.08,10001.0,2.113,5.44,2.079,10001.0,2.113,5.44,2.079,10001.0,2.113,5.056,2.079,10001.0,1.817,2.24,1.791,10001.0,3.885000,4.608000,3.807000
output_gpu,wide_deep,Fused_Cast_ReduceSum_split_15424511383944843442_kernel,1001.0,318.904,344.156,290.141,1001.0,317.406,1171.761,281.308,10001.0,39.513000000000005,50.303,38.752,10001.0,39.513000000000005,50.303,38.752,10001.0,39.256,50.463,38.656,10001.0,39.256,47.999,38.687,10001.0,41.382000000000005,50.239,40.896000
output_gpu,wide_deep,Fused_Add_Cast_Add_Cast_Add_Neg_Exp_Add_RealDiv_split_9392670721290642311_kernel,1001.0,1.495,4.544,1.471,1001.0,1.499,4.448,1.471,10001.0,1.492,4.864,1.471,10001.0,1.492,4.864,1.471,10001.0,1.492,4.448,1.471,10001.0,1.492,1.984,1.471,10001.0,3.264000,4.096000,3.199000
output_gpu,wide_deep,Fused_Cast_split_8201333723459195024_kernel,1001.0,3.645,6.528,3.584,1001.0,3.653,4.159,3.584,10001.0,3.634,6.88,3.552,10001.0,3.634,6.88,3.552,10001.0,3.635,6.496,3.583,10001.0,4.674,9.248,4.416,10001.0,5.362000,5.985000,5.279000
output_gpu,wide_deep,Fused_Reshape_Cast_Mul_split_14754478895751609020_kernel,1001.0,423.078,437.562,417.627,1001.0,397.875,405.307,396.699,10001.0,397.631,405.24,396.536,10001.0,397.631,405.24,396.536,10001.0,397.634,405.305,396.537,10001.0,398.045,1093.616,394.746,10001.0,400.55,406.17199999999997,399.068
output_gpu,wide_deep,Fused_Mul_split_12850699330263623486_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.371,2.9760000000000004,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.368,3.424,1.343,10001.0,1.369,1.92,1.343,10001.0,3.034000,3.712000,2.975000
output_gpu,wide_deep,Fused_Cast_Add_Greater_Cast_Mul_split_7266988415084394553_kernel,1001.0,16.293,20.8,15.903,1001.0,14.466,17.087,14.079,10001.0,14.442,19.84,13.983,10001.0,14.442,19.84,13.983,10001.0,14.446,19.424,13.984,10001.0,13.538,16.639,13.056,10001.0,16.554000,18.912000,15.647000
output_gpu,wide_deep,Fused_Mul_split_15128307268254343677_kernel,1001.0,1.232,3.936,1.215,1001.0,1.233,1.856,1.215,10001.0,1.231,9.536,1.215,10001.0,1.231,9.536,1.215,10001.0,1.23,4.0,1.215,10001.0,1.234,2.719,1.215,10001.0,2.968000,3.488000,2.911000
output_gpu,wide_deep,Fused_Cast_split_16126699656595961669_kernel,1001.0,1.421,4.16,1.4069999999999998,1001.0,1.4280000000000002,1.856,1.4069999999999998,10001.0,1.42,4.5760000000000005,1.376,10001.0,1.42,4.5760000000000005,1.376,10001.0,1.419,4.384,1.376,10001.0,1.22,1.568,1.183,10001.0,3.186000,3.712000,3.135000
output_gpu,wide_deep,Fused_Mul_split_3672699994263214826_kernel,1001.0,1.238,3.936,1.215,1001.0,1.24,4.32,1.215,10001.0,1.235,4.0,1.215,10001.0,1.235,4.0,1.215,10001.0,1.235,4.0,1.215,10001.0,1.234,1.6,1.215,10001.0,2.980000,3.679000,2.911000
output_gpu,wide_deep,Fused_Cast_Add_split_6834618630944280596_kernel,1001.0,59.902,64.031,59.327,1001.0,51.511,56.287,50.879,10001.0,49.635,56.255,47.903,10001.0,49.635,56.255,47.903,10001.0,49.737,55.903,48.031000000000006,10001.0,48.948,51.775,48.159,10001.0,53.131,55.391000000000005,51.998999999999995
output_gpu,wide_deep,Fused_Cast_Mul_split_7051392259342517497_kernel,1001.0,1.436,4.032,1.4069999999999998,1001.0,1.4340000000000002,1.664,1.4069999999999998,10001.0,1.433,4.288,1.4069999999999998,10001.0,1.433,4.288,1.4069999999999998,10001.0,1.433,4.032,1.407,10001.0,1.23,1.472,1.215,10001.0,3.057000,3.488000,3.007000
output_gpu,wide_deep,Fused_Mul_split_7731327840381384245_kernel,1001.0,1.226,3.936,1.1840000000000002,1001.0,1.237,8.192,1.1840000000000002,10001.0,1.224,4.0,1.183,10001.0,1.224,4.0,1.183,10001.0,1.225,4.032,1.183,10001.0,1.223,1.568,1.183,10001.0,2.966000,3.520000,2.911000
output_gpu,wide_deep,Fused_Cast_ReduceSum_split_18187264574057599824_kernel,1001.0,163.611,197.79,148.862,1001.0,149.734,851.029,147.934,10001.0,21.338,32.223,20.448,10001.0,21.338,32.223,20.448,10001.0,20.871,32.254999999999995,20.383,10001.0,20.675,28.928,20.192,10001.0,22.993000,31.744,21.824000
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_15920035459442552540_kernel,1001.0,1.223,2.9760000000000004,1.183,1001.0,1.222,1.536,1.1840000000000002,10001.0,1.225,3.104,1.183,10001.0,1.225,3.104,1.183,10001.0,1.225,9.119,1.183,10001.0,1.207,1.504,1.183,10001.0,2.869000,3.391000,2.815000
output_gpu,wide_deep,Fused_Cast_split_9939745215965046806_kernel,1001.0,26.844,32.192,26.304,1001.0,26.861,30.624,26.431,10001.0,26.422,32.096,25.536,10001.0,26.422,32.096,25.536,10001.0,26.442,32.575,25.408,10001.0,28.707,32.128,27.103,10001.0,28.869,32.064,28.16
output_gpu,wide_deep,Fused_Mul_fusion_9061280092631476395_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.375,4.32,1.3430000000000002,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.4560000000000004,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.361,1.984,1.312,10001.0,3.029000,3.743000,2.975000
output_gpu,wide_deep,Fused_Reshape_Mul_Cast_split_9289092669065748118_kernel,1001.0,451.051,466.394,441.786,1001.0,418.351,422.746,410.299,10001.0,401.352,414.104,398.84,10001.0,401.352,414.104,398.84,10001.0,401.173,416.344,398.68,10001.0,401.339,1133.968,398.938,10001.0,406.208,410.204,399.612
output_gpu,wide_deep,Fused_Cast_Add_split_17520633874024788448_kernel,1001.0,16.169,20.8,15.775,1001.0,14.224,19.776,13.824000000000002,10001.0,13.875,19.615,13.12,10001.0,13.875,19.615,13.12,10001.0,13.885,19.583,13.056,10001.0,13.4,16.575,12.927,10001.0,16.267000,19.104000,15.487000
output_gpu,wide_deep,Fused_BroadcastTo_Mul_split_275640177781852815_kernel,1001.0,1.107,3.84,1.087,1001.0,1.112,4.16,1.087,10001.0,1.108,4.192,1.087,10001.0,1.108,4.192,1.087,10001.0,1.108,4.0,1.087,10001.0,1.106,1.44,1.087,10001.0,2.899000,3.456000,2.815000
output_gpu,wide_deep,Fused_Mul_RealDiv_Reshape_Tile_Mul_Mul_Add_Mul_fusion_857025986291816991_kernel,1001.0,236.161,241.308,235.453,1001.0,237.01,1008.435,235.421,10001.0,236.547,241.179,235.483,10001.0,236.547,241.179,235.483,10001.0,236.539,241.115,235.484,10001.0,236.813,890.067,235.069,10001.0,237.978,241.309,237.31
output_gpu,wide_deep,Fused_Cast_ReduceSum_split_14289495857752519911_kernel,1001.0,51.047,53.952,51.007,1001.0,51.279,54.944,51.199,10001.0,5.672000000000001,9.504,5.088,10001.0,5.672000000000001,9.504,5.088,10001.0,4.848,9.408,4.736,10001.0,4.802,7.136,4.703,10001.0,6.800000,8.032000,6.495000
output_gpu,wide_deep,Fused_ReduceSum_split_588956880411653459_kernel,1001.0,3.749,5.952000000000001,3.68,1001.0,3.748,4.48,3.679,10001.0,2.155,5.44,2.112,10001.0,2.155,5.44,2.112,10001.0,2.155,5.312,2.111,10001.0,2.155,2.88,2.111,10001.0,4.162000,5.312000,4.095000
output_gpu,wide_deep,Fused_Cast_split_9353551539531787746_kernel,1001.0,1.436,4.0,1.4069999999999998,1001.0,1.4340000000000002,1.696,1.4069999999999998,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.4340000000000002,4.064,1.4069999999999998,10001.0,1.434,4.096,1.407,10001.0,1.437,2.24,1.407,10001.0,3.059000,3.648000,3.007000
output_gpu,wide_deep,Fused_Mul_fusion_548583029185277258_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.372,1.888,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.456,1.343,10001.0,1.175,1.696,1.151,10001.0,3.028000,3.711000,2.975000
output_gpu,wide_deep,Fused_Cast_Add_split_16080452064302553389_kernel,1001.0,119.362,125.055,117.406,1001.0,98.999,104.862,98.143,10001.0,94.889,103.518,93.311,10001.0,94.889,103.518,93.311,10001.0,95.247,103.294,93.278,10001.0,110.908,114.59,108.99,10001.0,99.863,102.047,98.751
output_gpu,wide_deep,Fused_Cast_ReduceSum_split_4345341268849637758_kernel,1001.0,100.077,103.55,99.871,1001.0,100.337,103.903,100.03,10001.0,13.686,20.288,12.703,10001.0,13.686,20.288,12.703,10001.0,12.317,20.031,11.36,10001.0,11.782,16.927999999999997,11.072,10001.0,14.956000,19.52,14.080000
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_5515132648152527640_kernel,1001.0,1.054,4.096,1.023,1001.0,1.055,1.344,1.023,10001.0,1.06,8.832,1.023,10001.0,1.06,8.832,1.023,10001.0,1.056,3.712,1.023,10001.0,1.054,2.144,1.023,10001.0,2.819000,3.232000,2.720000
output_gpu,wide_deep,Fused_Cast_Add_Greater_Cast_Mul_split_5351850128657482433_kernel,1001.0,121.361,125.439,120.415,1001.0,100.217,102.303,99.166,10001.0,96.337,104.926,93.822,10001.0,96.337,104.926,93.822,10001.0,95.783,104.638,93.855,10001.0,110.659,114.303,108.895,10001.0,100.461,103.135,99.423
output_gpu,wide_deep,Fused_Mul_Mul_Add_split_8089402150854391345_kernel,1001.0,1.555,3.68,1.535,1001.0,1.5590000000000002,8.416,1.535,10001.0,1.5519999999999998,3.776,1.535,10001.0,1.5519999999999998,3.776,1.535,10001.0,1.552,3.744,1.535,10001.0,1.332,1.984,1.311,10001.0,3.184000,4.032000,3.135000
output_gpu,wide_deep,Fused_Mul_split_18423090061797319888_kernel,1001.0,1.43,4.064,1.4069999999999998,1001.0,1.43,4.064,1.4069999999999998,10001.0,1.4280000000000002,8.447000000000001,1.4069999999999998,10001.0,1.4280000000000002,8.447000000000001,1.4069999999999998,10001.0,1.427,4.064,1.407,10001.0,1.428,1.664,1.407,10001.0,3.069000,3.520000,3.007000
output_gpu,wide_deep,Fused_Cast_Mul_split_9055599974624446663_kernel,1001.0,2.076,4.896,2.047,1001.0,2.088,5.28,2.047,10001.0,2.071,5.184,2.047,10001.0,2.071,5.184,2.047,10001.0,2.072,5.056,2.047,10001.0,1.784,2.208,1.759,10001.0,3.840000,4.608000,3.775000
output_gpu,wide_deep,Fused_Mul_fusion_8193310520161582410_kernel,1001.0,2.371,5.44,2.272,1001.0,2.369,3.008,2.303,10001.0,2.371,5.632000000000001,2.272,10001.0,2.371,5.632000000000001,2.272,10001.0,2.372,5.536,2.271,10001.0,2.067,8.704,2.016,10001.0,4.336000,4.928000,4.160000
output_gpu,wide_deep,Fused_BroadcastTo_inplace_assign_builder_10812234467967346125_kernel,1001.0,1.107,3.744,1.087,1001.0,1.111,1.472,1.087,10001.0,1.105,4.288,1.087,10001.0,1.105,4.288,1.087,10001.0,1.103,4.0,1.087,10001.0,1.104,1.504,1.087,10001.0,2.895000,3.872000,2.815000
output_gpu,wide_deep,Fused_Cast_Mul_split_10405421054794923061_kernel,1001.0,3.562,6.528,3.4560000000000004,1001.0,3.575,4.352,3.52,10001.0,3.5610000000000004,9.12,3.487,10001.0,3.5610000000000004,9.12,3.487,10001.0,3.56,6.624,3.487,10001.0,2.488,2.944,2.335,10001.0,5.299000,5.888000,5.184000
output_gpu,ocean_model,Fused_Add_Mul_Add_Mul_Mul_Mul_Mul_Mul_Mul_Add_Mul_Mul_Mul_Mul_Mul_Mul_Mul_Add_Mu_more_split_4550230658273292643_kernel,1001.0,4.237,7.296,4.064,1001.0,4.24,4.864,4.064,10001.0,4.231,7.648,4.063,10001.0,4.231,7.648,4.063,10001.0,4.235,7.584,4.095,10001.0,4.251,5.248,4.063,10001.0,6.385000,7.552000,6.207000
output_gpu,ocean_model,Fused_Add_Mul_split_280029639252862309_kernel,1001.0,1.58,4.416,1.536,1001.0,1.351,1.76,1.3119999999999998,10001.0,1.351,4.288,1.311,10001.0,1.351,4.288,1.311,10001.0,1.35,4.16,1.311,10001.0,1.346,1.952,1.311,10001.0,3.377000,4.032000,3.295000
output_gpu,ocean_model,Fused_Neg_Mul_Mul_Mul_Add_Mul_Add_Mul_Mul_Sub_Sub_Mul_Add_Mul_RealDiv_Sub_Mul_M_more_fusion_631695052049467379_kernel,1001.0,2.697,6.207999999999999,2.655,1001.0,2.693,3.776,2.655,10001.0,2.689,6.176,2.655,10001.0,2.689,6.176,2.655,10001.0,2.689,6.368,2.655,10001.0,2.607,3.712,2.559,10001.0,4.412000,5.664000,4.351000
output_gpu,ocean_model,Fused_Mul_Sub_Mul_Add_Sub_Add_RealDiv_Mul_Mul_Add_Mul_Mul_Add_Sub_Mul_Mul_Add_Mu_more_split_16326187381967217519_kernel,1001.0,3.813,7.136,3.711,1001.0,3.814,5.024,3.712,10001.0,3.809,7.199,3.712,10001.0,3.809,7.199,3.712,10001.0,3.808,7.168,3.711,10001.0,3.82,4.863,3.712,10001.0,5.540000,6.432000,5.408000
output_gpu,ocean_model,Fused_Sub_Mul_Add_Mul_split_16938530135943794796_kernel,1001.0,2.124,5.28,2.079,1001.0,2.122,2.752,2.079,10001.0,2.1180000000000003,5.12,2.079,10001.0,2.1180000000000003,5.12,2.079,10001.0,2.119,5.088,2.079,10001.0,2.105,2.623,2.079,10001.0,3.902000,4.704000,3.839000
output_gpu,ocean_model,Fused_Neg_Mul_Mul_Add_Mul_Add_Mul_Mul_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mu_more_split_6740904971649010677_kernel,1001.0,5.571000000000001,9.696,5.247999999999999,1001.0,5.499,7.167999999999999,5.216,10001.0,5.558,9.76,5.152,10001.0,5.558,9.76,5.152,10001.0,5.555,10.175,5.216,10001.0,5.572,10.016,5.152,10001.0,8.060000,9.632000,6.848000
output_gpu,ocean_model,Fused_Mul_Mul_RealDiv_Mul_RealDiv_RealDiv_Mul_RealDiv_Add_RealDiv_RealDiv_Add_Mu_more_split_5841614750467548816_kernel,1001.0,4.011,7.68,3.904,1001.0,4.008,4.864,3.903,10001.0,4.005,8.031,3.872,10001.0,4.005,8.031,3.872,10001.0,4.004,7.968,3.872,10001.0,4.016,5.408,3.904,10001.0,6.023000,7.136000,5.887000
output_gpu,ocean_model,Fused_RealDiv_RealDiv_Add_Mul_Add_Mul_Add_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mu_more_split_18430001552349775653_kernel,1001.0,3.815,8.48,3.775,1001.0,3.637,5.824,3.615,10001.0,3.632,8.288,3.615,10001.0,3.632,8.288,3.615,10001.0,3.632,8.096,3.584,10001.0,3.623,5.664,3.583,10001.0,5.246000,6.944000,5.183000
output_gpu,ocean_model,Fused_Mul_Mul_Add_split_2071922567707676054_kernel,1001.0,2.132,5.6,2.08,1001.0,2.129,2.8160000000000003,2.111,10001.0,2.127,5.088,2.079,10001.0,2.127,5.088,2.079,10001.0,2.126,5.184,2.079,10001.0,2.103,2.56,2.079,10001.0,3.884000,4.448000,3.807000
output_gpu,ocean_model,Fused_Mul_Mul_Sub_Mul_RealDiv_Mul_Mul_Mul_split_13641222270842381547_kernel,1001.0,2.52,6.112,2.495,1001.0,2.515,3.103,2.495,10001.0,2.511,6.112,2.463,10001.0,2.511,6.112,2.463,10001.0,2.511,6.176,2.463,10001.0,2.502,3.647,2.432,10001.0,4.604000,5.887000,4.543000
output_gpu,ocean_model,Fused_Mul_Mul_RealDiv_RealDiv_Add_RealDiv_Sub_Mul_Sub_RealDiv_Mul_Sub_Mul_Add_Ad_more_split_15745108444977328389_kernel,1001.0,5.808,9.536,5.6,1001.0,5.79,7.552,5.5360000000000005,10001.0,5.8020000000000005,10.24,5.5360000000000005,10001.0,5.8020000000000005,10.24,5.5360000000000005,10001.0,5.801,9.696,5.504,10001.0,5.772,7.008,5.536,10001.0,7.341000,8.895000,7.200000
output_gpu,ocean_model,Fused_Mul_Mul_Mul_split_9200335753560740875_kernel,1001.0,2.329,5.5360000000000005,2.303,1001.0,2.326,3.232,2.303,10001.0,2.327,5.695,2.303,10001.0,2.327,5.695,2.303,10001.0,2.325,5.856,2.303,10001.0,2.307,3.232,2.271,10001.0,4.326000,5.312000,4.255000
output_gpu,ocean_model,Fused_Mul_split_11899235680786148265_kernel,1001.0,2.016,4.832,1.983,1001.0,2.015,2.464,1.983,10001.0,2.013,4.928,1.983,10001.0,2.013,4.928,1.983,10001.0,2.013,4.927,1.983,10001.0,1.985,2.432,1.951,10001.0,3.780000,4.480000,3.743000
output_gpu,ocean_model,Fused_Add_Mul_split_9691463965559746728_kernel,1001.0,1.58,4.384,1.567,1001.0,1.351,1.824,1.3119999999999998,10001.0,1.351,4.224,1.3119999999999998,10001.0,1.351,4.224,1.3119999999999998,10001.0,1.351,4.288,1.311,10001.0,1.351,1.824,1.312,10001.0,3.371000,4.032000,3.295000
output_gpu,ocean_model,Fused_Mul_Mul_Sub_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mul_Add_Add_Mul_Add_Add_Ad_more_split_18009587845282665190_kernel,1001.0,3.085,7.072,3.04,1001.0,2.873,4.704,2.847,10001.0,2.869,6.848,2.847,10001.0,2.869,6.848,2.847,10001.0,2.87,6.976,2.847,10001.0,2.868,4.352,2.847,10001.0,4.523000,5.952000,4.447000
output_gpu,ocean_model,Fused_Mul_RealDiv_RealDiv_Add_Mul_Add_Add_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mu_more_split_3614365085361721488_kernel,1001.0,3.256,7.423999999999999,3.2310000000000003,1001.0,3.023,4.384,3.007,10001.0,3.02,7.136,3.007,10001.0,3.02,7.136,3.007,10001.0,3.02,7.04,3.007,10001.0,3.019,4.352,2.975,10001.0,4.738000,6.240000,4.671000
output_gpu,ocean_model,Fused_Mul_split_17639355236687109424_kernel,1001.0,1.556,4.32,1.535,1001.0,1.33,1.76,1.311,10001.0,1.33,4.192,1.311,10001.0,1.33,4.192,1.311,10001.0,1.331,4.16,1.311,10001.0,1.325,1.792,1.311,10001.0,3.117000,3.680000,3.071000
output_gpu,ocean_model,Fused_Add_Mul_Mul_split_4258656416256596482_kernel,1001.0,1.922,4.896,1.887,1001.0,1.7009999999999998,2.624,1.664,10001.0,1.7009999999999998,5.024,1.663,10001.0,1.7009999999999998,5.024,1.663,10001.0,1.701,4.928,1.663,10001.0,1.696,2.496,1.663,10001.0,3.740000,5.279000,3.679000
output_gpu,ocean_model,Fused_Mul_Mul_Sub_Mul_RealDiv_Add_Add_Mul_Sub_RealDiv_Mul_Add_split_11177634212938144189_kernel,1001.0,3.157,6.752000000000001,3.104,1001.0,3.1630000000000003,4.0,3.135,10001.0,3.154,6.816,3.104,10001.0,3.154,6.816,3.104,10001.0,3.15,6.848,3.103,10001.0,3.109,6.688,3.071,10001.0,5.209000,6.367000,5.121000
output_gpu,ocean_model,Fused_Mul_Sub_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mul_Add_Add_Mul_Add_Add_Add_Mu_more_split_13935287016784114518_kernel,1001.0,3.233,7.487999999999999,3.199,1001.0,3.007,4.736000000000001,2.975,10001.0,3.003,6.944,2.975,10001.0,3.003,6.944,2.975,10001.0,3.003,7.135,2.975,10001.0,3.002,4.576,2.975,10001.0,4.683000,6.112000,4.607000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Mul_Sub_Mul_Mul_Add_Mul_Mul_Sub_Mul_Mul_Add_Mul_Sub_Mul_A_more_fusion_17407908202416743929_kernel,1001.0,3.69,7.391,3.616,1001.0,3.849,10.976,3.584,10001.0,3.682,7.648,3.583,10001.0,3.682,7.648,3.583,10001.0,3.681,7.743,3.583,10001.0,3.37,5.056,3.232,10001.0,5.877000,7.392000,5.728000
output_gpu,ocean_model,Fused_Mul_Mul_Mul_split_4463844577328628603_kernel,1001.0,1.753,4.768,1.727,1001.0,1.5319999999999998,2.143,1.503,10001.0,1.531,4.64,1.503,10001.0,1.531,4.64,1.503,10001.0,1.531,4.704,1.503,10001.0,1.525,2.24,1.503,10001.0,3.506000,4.576000,3.424000
output_gpu,ocean_model,Fused_Mul_Mul_Add_fusion_8662538868837885053_kernel,1001.0,2.13,5.184,2.079,1001.0,2.124,2.688,2.08,10001.0,2.123,9.568,2.079,10001.0,2.123,9.568,2.079,10001.0,2.122,5.6,2.079,10001.0,2.096,2.656,2.048,10001.0,3.888000,4.511000,3.807000
output_gpu,ocean_model,Fused_Mul_ReduceSum_split_590200782861150805_kernel,1001.0,6.723,9.888,6.687,1001.0,6.72,7.552,6.687,10001.0,1.978,5.184,1.951,10001.0,1.978,5.184,1.951,10001.0,1.94,5.248,1.919,10001.0,1.921,5.152,1.887,10001.0,3.659000,4.543000,3.615000
output_gpu,ocean_model,Fused_Mul_Add_Sub_Mul_RealDiv_RealDiv_Mul_Add_Mul_Mul_Add_RealDiv_Sub_Mul_Sub_Re_more_split_4491669579815926202_kernel,1001.0,5.707000000000001,9.888,5.472,1001.0,5.688,7.52,5.44,10001.0,5.672000000000001,9.983,5.44,10001.0,5.672000000000001,9.983,5.44,10001.0,5.668,9.824,5.44,10001.0,5.637,7.296,5.408,10001.0,7.791000,9.248000,7.615000
output_gpu,ocean_model,Fused_Mul_Mul_Mul_Mul_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Ad_more_split_4371667754549325173_kernel,1001.0,2.838,7.36,2.815,1001.0,2.617,3.968,2.591,10001.0,2.614,6.5920000000000005,2.591,10001.0,2.614,6.5920000000000005,2.591,10001.0,2.615,6.592,2.591,10001.0,2.615,4.096,2.591,10001.0,4.619000,6.144000,4.544000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mul_Add_Add_Mul_Add_Add_Ad_more_split_497653174069962096_kernel,1001.0,3.0860000000000003,7.232,3.04,1001.0,2.873,4.48,2.847,10001.0,2.87,6.944,2.847,10001.0,2.87,6.944,2.847,10001.0,2.87,6.88,2.847,10001.0,2.608,4.256,2.591,10001.0,4.516000,5.696000,4.447000
output_gpu,ocean_model,Fused_Mul_Add_RealDiv_Mul_Add_Sub_Mul_Add_Add_RealDiv_Mul_Mul_Sub_Add_Mul_Mul_Mu_more_split_11966298842298929878_kernel,1001.0,6.728,11.36,6.303999999999999,1001.0,6.7410000000000005,11.776,6.272,10001.0,6.718999999999999,11.328,6.144,10001.0,6.718999999999999,11.328,6.144,10001.0,6.734,11.36,6.144,10001.0,6.689,8.96,6.176,10001.0,8.958000,12.448000,8.320000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Mul_Sub_Add_Mul_Add_Add_Add_split_15261566472710478078_kernel,1001.0,2.068,5.6,2.047,1001.0,1.843,3.008,1.823,10001.0,1.841,5.12,1.823,10001.0,1.841,5.12,1.823,10001.0,1.841,5.376,1.823,10001.0,1.678,2.848,1.632,10001.0,3.597000,4.416000,3.520000
output_gpu,ocean_model,Fused_Mul_Mul_Add_RealDiv_Sqrt_Mul_Mul_Mul_Add_Sub_Mul_Add_Mul_Mul_Sub_Mul_Mul__more_fusion_4592331833935969253_kernel,1001.0,4.843999999999999,8.576,4.703,1001.0,5.306,7.456,5.247999999999999,10001.0,5.303,9.952,5.247000000000001,10001.0,5.303,9.952,5.247000000000001,10001.0,5.306,9.823,5.247,10001.0,4.804,6.816,4.767,10001.0,7.311000,9.248000,7.231000
output_gpu,ocean_model,Fused_Mul_Sub_Mul_Add_Sub_Add_RealDiv_Mul_Mul_Add_Mul_Mul_Add_Sub_Mul_Mul_Add_split_16233265103804322472_kernel,1001.0,3.217,6.688,3.104,1001.0,3.218,4.256,3.136,10001.0,3.21,6.816,3.104,10001.0,3.21,6.816,3.104,10001.0,3.221,6.944,3.104,10001.0,3.22,4.288,3.135,10001.0,4.952000,6.176000,4.800000
output_gpu,ocean_model,Fused_Mul_Mul_Mul_Mul_ReduceSum_split_14684688370529604183_kernel,1001.0,7.965,12.192,7.904,1001.0,7.957000000000001,9.28,7.903,10001.0,6.121,9.023,5.92,10001.0,6.121,9.023,5.92,10001.0,5.319,8.32,5.184,10001.0,5.309,9.504,5.183,10001.0,7.527000,8.352000,7.296000
output_gpu,ocean_model,Fused_Sub_Mul_Add_Sub_Add_RealDiv_Mul_Mul_Add_Mul_Mul_Add_Sub_Mul_Mul_Add_split_11499417400706344794_kernel,1001.0,3.2260000000000004,6.784,3.136,1001.0,3.233,4.192,3.167,10001.0,3.2230000000000003,6.976,3.104,10001.0,3.2230000000000003,6.976,3.104,10001.0,3.225,6.848,3.135,10001.0,3.235,6.944,3.167,10001.0,4.958000,5.888000,4.832000
output_gpu,ocean_model,Fused_Add_fusion_11541242400929822124_kernel,1001.0,1.57,4.416,1.536,1001.0,1.339,2.048,1.311,10001.0,1.339,4.352,1.311,10001.0,1.339,4.352,1.311,10001.0,1.339,4.224,1.311,10001.0,1.333,2.016,1.311,10001.0,3.358000,3.967000,3.295000
output_gpu,ocean_model,Fused_Mul_RealDiv_RealDiv_Add_RealDiv_Sub_Mul_Sub_RealDiv_Mul_Mul_Sub_Add_Mul_Mu_more_split_3863478738793411496_kernel,1001.0,13.645,18.047,12.96,1001.0,13.62,17.375999999999998,12.832,10001.0,13.614,17.76,12.704,10001.0,13.614,17.76,12.704,10001.0,13.574,17.536,12.512,10001.0,13.631,15.2,12.736,10001.0,15.841000,17.664000,14.496000
output_gpu,ocean_model,Fused_Mul_Mul_Add_split_14954380063950956193_kernel,1001.0,1.998,5.152,1.952,1001.0,1.996,2.688,1.952,10001.0,1.995,5.056,1.951,10001.0,1.995,5.056,1.951,10001.0,1.994,5.12,1.951,10001.0,1.983,5.12,1.951,10001.0,3.749000,4.672000,3.679000
output_gpu,ocean_model,Fused_Sub_Add_Mul_Add_fusion_15980014615199773479_kernel,1001.0,1.654,4.48,1.631,1001.0,1.413,1.856,1.376,10001.0,1.413,4.288,1.375,10001.0,1.413,4.288,1.375,10001.0,1.413,4.287,1.376,10001.0,1.409,2.24,1.375,10001.0,3.186000,4.000000,3.135000
output_gpu,ocean_model,Fused_Mul_Add_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mul_Add_Add_Mul_Add_Add_Add_Mu_more_split_2161528760686140622_kernel,1001.0,3.234,7.838999999999999,3.199,1001.0,3.006,4.927,2.975,10001.0,3.003,7.296,2.975,10001.0,3.003,7.296,2.975,10001.0,3.003,7.232,2.975,10001.0,3.003,4.704,2.975,10001.0,5.019000,6.432000,4.959000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mul_Mu_more_split_17873142020478805571_kernel,1001.0,12.726,13.983,11.104,1001.0,12.661,13.536,10.879,10001.0,12.642,14.527,10.464,10001.0,12.642,14.527,10.464,10001.0,12.655,14.271,10.912,10001.0,12.615,13.664,10.495,10001.0,14.397000,15.552000,11.680000
output_gpu,ocean_model,Fused_Sub_Mul_Add_Add_RealDiv_Mul_Mul_Sub_Add_Mul_Mul_Mul_Add_Mul_Add_Add_Mul_Mu_more_split_4381815675088911499_kernel,1001.0,5.332999999999999,9.056,5.12,1001.0,5.325,6.528,5.12,10001.0,5.295,9.216,5.056,10001.0,5.295,9.216,5.056,10001.0,5.332,9.311,5.119,10001.0,5.275,6.752,5.056,10001.0,7.482000,8.800000,7.295000
output_gpu,ocean_model,Fused_Mul_split_16704790430360613579_kernel,1001.0,1.569,4.416,1.536,1001.0,1.339,1.7280000000000002,1.311,10001.0,1.339,4.448,1.311,10001.0,1.339,4.448,1.311,10001.0,1.339,4.256,1.311,10001.0,1.334,1.888,1.311,10001.0,3.142000,3.745000,3.071000
output_gpu,ocean_model,Fused_Sub_Mul_Add_Add_RealDiv_Mul_Mul_Sub_Add_Mul_Mul_Mul_Add_Mul_Add_Mul_Mul_Mu_more_split_2814012812532437573_kernel,1001.0,3.276,6.528,3.2,1001.0,3.274,4.16,3.2,10001.0,3.274,9.344,3.199,10001.0,3.274,9.344,3.199,10001.0,3.268,6.688,3.199,10001.0,3.253,6.496,3.168,10001.0,4.993000,6.049000,4.895000
output_gpu,ocean_model,Fused_Mul_Add_Mul_Add_split_12100373439152999075_kernel,1001.0,1.6569999999999998,4.5760000000000005,1.631,1001.0,1.413,1.984,1.376,10001.0,1.412,4.512,1.375,10001.0,1.412,4.512,1.375,10001.0,1.412,4.48,1.375,10001.0,1.412,4.32,1.375,10001.0,3.423000,4.033000,3.359000
output_gpu,ocean_model,Fused_Mul_Add_Mul_Sub_Mul_Add_Mul_Add_Mul_Sub_Mul_Add_Mul_Add_Mul_Mul_Sub_Mul_M_more_fusion_14105847376143261847_kernel,1001.0,4.582,8.128,4.448,1001.0,4.58,5.247999999999999,4.447,10001.0,4.571000000000001,8.32,4.416,10001.0,4.571000000000001,8.32,4.416,10001.0,4.574,8.576,4.416,10001.0,4.587,5.663,4.448,10001.0,6.811000,7.935000,6.624000
output_gpu,ocean_model,Fused_AssignAdd_2227367798448543938_kernel,1001.0,1.372,3.392,1.3430000000000002,1001.0,1.421,1.952,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.552,1.407,10001.0,1.428,3.488,1.407,10001.0,3.276000,4.032000,3.200000
output_gpu,ocean_model,Fused_Abs_Abs_Mul_Mul_Add_Sqrt_Mul_Mul_RealDiv_Greater_Cast_Greater_Cast_Mul_Mul_more_split_6851537396633562808_kernel,1001.0,13.688,17.023,12.832,1001.0,13.682,16.544,12.8,10001.0,13.707,17.375,12.704,10001.0,13.707,17.375,12.704,10001.0,13.663,16.992,12.48,10001.0,13.57,16.64,12.447,10001.0,15.902000,17.824000,14.624000
output_gpu,ocean_model,Fused_Mul_split_2232376661439959905_kernel,1001.0,1.57,4.448,1.535,1001.0,1.34,1.92,1.311,10001.0,1.339,4.64,1.311,10001.0,1.339,4.64,1.311,10001.0,1.34,4.352,1.311,10001.0,1.334,2.176,1.311,10001.0,3.356000,4.064000,3.295000
output_gpu,ocean_model,Fused_Mul_RealDiv_RealDiv_Add_Mul_Add_Sub_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mu_more_split_15897894188897590299_kernel,1001.0,3.2510000000000003,7.456,3.2310000000000003,1001.0,3.022,4.512,3.007,10001.0,3.019,6.976,3.007,10001.0,3.019,6.976,3.007,10001.0,3.019,7.168,3.007,10001.0,3.015,4.32,2.975,10001.0,4.721000,6.336000,4.640000
output_gpu,ocean_model,Fused_Sub_Mul_Mul_RealDiv_Sub_Mul_Add_Add_RealDiv_Mul_Mul_Sub_Add_Mul_Mul_Mul_Ad_more_split_10683122226242959033_kernel,1001.0,3.942,7.999,3.808,1001.0,3.944,5.92,3.84,10001.0,3.935,8.16,3.808,10001.0,3.935,8.16,3.808,10001.0,3.935,8.128,3.776,10001.0,3.93,8.352,3.776,10001.0,5.673000,7.040000,5.503000
output_gpu,ocean_model,Fused_Sub_fusion_7405813685502833142_kernel,1001.0,1.808,4.735,1.76,1001.0,1.806,2.3680000000000003,1.76,10001.0,1.806,4.767,1.759,10001.0,1.806,4.767,1.759,10001.0,1.805,5.12,1.76,10001.0,1.786,4.864,1.759,10001.0,3.588000,4.288000,3.519000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Add_Mul_Add_Mul_Add_Mul_Add_Mul_Add_Mul_Add_Mul_Add_Mul_A_more_fusion_2930263652351074797_kernel,1001.0,2.196,5.728,2.175,1001.0,1.933,3.2,1.919,10001.0,1.932,5.792000000000001,1.888,10001.0,1.932,5.792000000000001,1.888,10001.0,1.932,5.631,1.888,10001.0,1.931,6.016,1.887,10001.0,3.678000,4.704000,3.616000
output_gpu,ocean_model,Fused_Mul_Mul_Sub_Add_Sub_Mul_Add_RealDiv_Mul_Add_split_17052612682942218914_kernel,1001.0,2.715,6.144,2.655,1001.0,2.722,3.743,2.656,10001.0,2.712,6.144,2.655,10001.0,2.712,6.144,2.655,10001.0,2.712,6.272,2.655,10001.0,2.706,3.648,2.655,10001.0,4.426000,5.440000,3.488000
output_gpu,ocean_model,Fused_Add_Mul_Mul_Log_RealDiv_Mul_Greater_Cast_Sub_Mul_Add_Less_Cast_Sub_Mul_Mul_more_split_17573120460584264631_kernel,1001.0,1.783,4.864,1.759,1001.0,1.561,2.144,1.535,10001.0,1.882,5.119,1.855,10001.0,1.882,5.119,1.855,10001.0,1.882,5.216,1.855,10001.0,1.888,2.592,1.855,10001.0,3.864000,4.864000,3.776000
output_gpu,ocean_model,Fused_Sub_Mul_Add_Mul_Sub_fusion_8024827762740561513_kernel,1001.0,1.77,4.832,1.7280000000000002,1001.0,1.548,2.112,1.535,10001.0,1.548,4.5760000000000005,1.535,10001.0,1.548,4.5760000000000005,1.535,10001.0,1.548,4.832,1.535,10001.0,1.548,4.896,1.535,10001.0,3.345000,4.032000,3.265000
output_gpu,ocean_model,Fused_RealDiv_RealDiv_Add_Mul_Add_Mul_Sub_Mul_Mul_RealDiv_RealDiv_RealDiv_Add_Mu_more_split_727490911965010905_kernel,1001.0,3.814,8.384,3.775,1001.0,3.636,5.696000000000001,3.615,10001.0,3.633,8.416,3.583,10001.0,3.633,8.416,3.583,10001.0,3.632,8.223,3.584,10001.0,3.619,5.504,3.583,10001.0,5.239000,7.040000,5.183000
output_gpu,ocean_model,Fused_Mul_Mul_Sub_Sub_Mul_Add_Add_RealDiv_Mul_Mul_Add_Mul_Mul_Mul_Add_split_16794131334612252762_kernel,1001.0,2.929,6.464,2.847,1001.0,2.928,3.775,2.8480000000000003,10001.0,2.926,6.656000000000001,2.847,10001.0,2.926,6.656000000000001,2.847,10001.0,2.925,7.391,2.847,10001.0,2.924,4.32,2.847,10001.0,5.028000,5.984000,4.927000
output_gpu,ocean_model,Fused_Mul_Mul_Add_Mul_Sqrt_Mul_Sub_Mul_Add_split_5221610296727807210_kernel,1001.0,2.3040000000000003,5.28,2.271,1001.0,2.3,2.8480000000000003,2.271,10001.0,2.301,5.28,2.271,10001.0,2.301,5.28,2.271,10001.0,2.299,5.344,2.271,10001.0,2.064,2.624,2.016,10001.0,4.228000,4.895000,4.159000
output_gpu,ocean_model,Fused_NotEqual_fusion_14063688001663488845_kernel,1001.0,1.3730000000000002,3.359,1.3430000000000002,1001.0,1.3730000000000002,1.92,1.3430000000000002,10001.0,1.371,3.424,1.3430000000000002,10001.0,1.371,3.424,1.3430000000000002,10001.0,1.372,3.424,1.343,10001.0,1.331,1.888,1.311,10001.0,3.031000,3.936000,2.945000
output_gpu,ocean_model,Fused_Sub_Add_Mul_RealDiv_Add_Mul_Mul_split_6989475584441231024_kernel,1001.0,2.64,5.952000000000001,2.592,1001.0,2.641,3.616,2.592,10001.0,2.634,6.207999999999999,2.591,10001.0,2.634,6.207999999999999,2.591,10001.0,2.633,6.432,2.591,10001.0,2.633,6.144,2.591,10001.0,4.747000,5.729000,4.671000
output_gpu,ocean_model,Fused_Mul_Mul_Add_fusion_6199310446250699574_kernel,1001.0,2.13,5.056,2.08,1001.0,2.133,5.28,2.08,10001.0,2.122,5.12,2.079,10001.0,2.122,5.12,2.079,10001.0,2.123,5.088,2.08,10001.0,1.912,2.528,1.887,10001.0,3.895000,4.480000,3.807000
output_gpu,ocean_model,Fused_Mul_Neg_Mul_Mul_Add_Mul_Add_Mul_Mul_Mul_Mul_Add_Mul_Mul_Add_Mul_Mul_Add_Mu_more_split_8866495038320554088_kernel,1001.0,5.635,9.472,5.343999999999999,1001.0,5.587000000000001,7.36,5.247999999999999,10001.0,5.627999999999999,9.728,5.28,10001.0,5.627999999999999,9.728,5.28,10001.0,5.625,9.632,5.28,10001.0,5.58,7.007,5.247,10001.0,8.162000,9.760000,7.551000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_split_5090397040456000108_kernel,1001.0,1.495,4.192,1.471,1001.0,1.6980000000000002,4.416,1.664,10001.0,1.695,4.479,1.663,10001.0,1.695,4.479,1.663,10001.0,1.695,4.512,1.663,10001.0,1.695,4.864,1.663,10001.0,3.476000,4.000000,3.423000
output_gpu,transformer_bs8,Fused_Dropout_13957269975033262040_kernel,1001.0,16.121,20.288,15.584,1001.0,16.117,20.352,15.616,10001.0,16.052,20.607,15.072,10001.0,16.052,20.607,15.072,10001.0,16.196,20.576,15.552,10001.0,19.974,26.367,18.912,10001.0,17.950000,20.256000,17.279000
output_gpu,transformer_bs8,Fused_Cast_Reciprocal_Mul_Mul_split_7493177653167057686_kernel,1001.0,1.4269999999999998,4.255,1.4069999999999998,1001.0,1.426,1.92,1.4069999999999998,10001.0,1.425,4.352,1.4069999999999998,10001.0,1.425,4.352,1.4069999999999998,10001.0,1.425,4.384,1.407,10001.0,1.431,4.288,1.407,10001.0,3.119000,3.776000,3.071000
output_gpu,transformer_bs8,Fused_Reshape_Cast_fusion_2450286835023766724_kernel,1001.0,1.7280000000000002,4.544,1.664,1001.0,2.114,4.992,2.079,10001.0,2.067,4.927,2.047,10001.0,2.067,4.927,2.047,10001.0,2.066,4.928,2.016,10001.0,2.072,5.28,2.047,10001.0,4.133000,4.832000,4.064000
output_gpu,transformer_bs8,Fused_Cast_Add_DropoutGrad_fusion_16647297462625763693_kernel,1001.0,2.422,5.312,2.399,1001.0,2.086,2.56,2.048,10001.0,2.418,6.016,2.367,10001.0,2.418,6.016,2.367,10001.0,2.416,5.44,2.367,10001.0,2.084,2.944,2.047,10001.0,4.196000,4.864000,4.127000
output_gpu,transformer_bs8,Fused_Cast_fusion_9402160209689404456_kernel,1001.0,152.533,157.95,151.90200000000002,1001.0,150.718,154.718,150.142,10001.0,150.905,158.30100000000002,149.94899999999998,10001.0,150.905,158.30100000000002,149.94899999999998,10001.0,150.995,158.269,149.981,10001.0,143.34,149.887,141.95,10001.0,154.254,159.999,152.79899999999998
output_gpu,transformer_bs8,Fused_DropoutGrad_17561699232604575970_kernel,1001.0,3.794,6.624,3.743,1001.0,3.797,6.656000000000001,3.743,10001.0,3.79,6.72,3.712,10001.0,3.79,6.72,3.712,10001.0,3.79,6.752,3.743,10001.0,5.68,8.928,5.376,10001.0,5.515000,6.112000,5.439000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_17465018595820529493_kernel,1001.0,3.867,6.816,3.68,1001.0,4.173,4.6080000000000005,4.0,10001.0,4.168,7.392,3.936,10001.0,4.168,7.392,3.936,10001.0,4.169,7.04,3.968,10001.0,4.176,6.656,3.968,10001.0,5.832000,6.304000,5.663000
output_gpu,transformer_bs8,Fused_Cast_Reciprocal_Mul_Mul_split_17798736324821665678_kernel,1001.0,15.283,21.375,14.591,1001.0,15.264,18.848,14.624,10001.0,15.258,21.632,14.399,10001.0,15.258,21.632,14.399,10001.0,15.261,21.952,14.432,10001.0,15.26,19.456,14.368,10001.0,17.639000,21.888000,16.480000
output_gpu,transformer_bs8,Fused_LogicalNot_LogicalAnd_Mul_Select_Assign_fusion_12934296097103418898_kernel,1001.0,1.588,3.712,1.567,1001.0,1.629,3.744,1.599,10001.0,1.6269999999999998,3.84,1.599,10001.0,1.6269999999999998,3.84,1.599,10001.0,1.627,3.872,1.599,10001.0,1.637,3.936,1.599,10001.0,3.481000,4.448000,3.423000
output_gpu,transformer_bs8,Fused_DropoutGrad_Cast_Mul_fusion_2453450888403074732_kernel,1001.0,3.3280000000000003,6.56,3.232,1001.0,3.329,4.128,3.232,10001.0,3.322,9.599,3.2,10001.0,3.322,9.599,3.2,10001.0,3.322,6.719,3.2,10001.0,3.279,7.168,3.199,10001.0,5.088000,5.952000,4.960000
output_gpu,transformer_bs8,Fused_Dropout_13816000070003386799_kernel,1001.0,30.152,35.264,29.567,1001.0,30.176,32.895,29.407,10001.0,29.87,35.26300000000001,28.832,10001.0,29.87,35.26300000000001,28.832,10001.0,30.05,35.26300000000001,29.023,10001.0,35.812,42.047,33.983,10001.0,31.959000,35.007999999999996,31.232000
output_gpu,transformer_bs8,Fused_Add_split_3559605619773630418_kernel,1001.0,3.791,6.5920000000000005,3.743,1001.0,3.796,4.416,3.743,10001.0,3.788,6.656000000000001,3.712,10001.0,3.788,6.656000000000001,3.712,10001.0,3.787,6.72,3.712,10001.0,5.67,6.496,5.408,10001.0,6.013000,6.784000,5.919000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_split_8492589696560582847_kernel,1001.0,1.319,4.064,1.28,1001.0,1.493,4.192,1.471,10001.0,1.49,4.544,1.471,10001.0,1.49,4.544,1.471,10001.0,1.49,4.287,1.471,10001.0,1.489,4.512,1.471,10001.0,3.466000,4.032000,3.391000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_3619668912583962051_kernel,1001.0,3.2960000000000003,6.144,3.232,1001.0,2.612,3.072,2.559,10001.0,2.603,5.567,2.559,10001.0,2.603,5.567,2.559,10001.0,2.602,5.664,2.559,10001.0,2.607,3.104,2.559,10001.0,4.280000,6.752000,4.191000
output_gpu,transformer_bs8,Fused_DropoutGrad_Cast_Mul_fusion_12576306490614610767_kernel,1001.0,2.461,5.343999999999999,2.3680000000000003,1001.0,2.47,5.247999999999999,2.399,10001.0,2.46,5.376,2.3680000000000003,10001.0,2.46,5.376,2.3680000000000003,10001.0,2.461,5.344,2.367,10001.0,2.435,7.424,2.336,10001.0,4.164000,4.800000,4.032000
output_gpu,transformer_bs8,Fused_Cast_fusion_7372772219015080439_kernel,1001.0,38.56,43.903,38.111,1001.0,39.308,722.296,38.208,10001.0,38.154,43.935,36.896,10001.0,38.154,43.935,36.896,10001.0,37.967,43.83900000000001,36.671,10001.0,41.31,46.111,38.976,10001.0,40.105000000000004,43.903,39.423
output_gpu,transformer_bs8,Fused_DropoutGrad_4004227147354677168_kernel,1001.0,2.342,5.28,2.271,1001.0,2.341,5.343999999999999,2.272,10001.0,2.335,5.632000000000001,2.272,10001.0,2.335,5.632000000000001,2.272,10001.0,2.336,5.44,2.271,10001.0,3.508,6.56,3.36,10001.0,4.173000,4.800000,4.064000
output_gpu,transformer_bs8,Fused_BroadcastTo_Mul_Neg_Reshape_BroadcastTo_Mul_split_3119297338359657369_kernel,1001.0,55.183,60.191,54.559,1001.0,52.018,58.399,51.487,10001.0,51.889,57.407,51.167,10001.0,51.889,57.407,51.167,10001.0,51.898,57.43899999999999,51.199,10001.0,51.888,752.344,51.103,10001.0,53.967,58.496,53.279
output_gpu,transformer_bs8,Fused_Sub_Mul_Cast_Mul_fusion_8433708891034318749_kernel,1001.0,1.813,4.704,1.791,1001.0,1.812,4.544,1.791,10001.0,1.81,4.672,1.76,10001.0,1.81,4.672,1.76,10001.0,1.81,4.672,1.76,10001.0,1.813,4.928,1.76,10001.0,3.570000,4.128000,3.488000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_split_9638227642095393005_kernel,1001.0,9.663,13.088,9.216,1001.0,9.306,13.088,8.927999999999999,10001.0,9.298,13.152,8.831999999999997,10001.0,9.298,13.152,8.831999999999997,10001.0,9.295,13.151,8.895,10001.0,9.276,13.566999999999998,8.48,10001.0,11.971000,14.048000,11.168000
output_gpu,transformer_bs8,Fused_Add_split_12083053084646223236_kernel,1001.0,1.686,4.416,1.663,1001.0,1.6980000000000002,4.48,1.663,10001.0,1.6840000000000002,4.6080000000000005,1.663,10001.0,1.6840000000000002,4.6080000000000005,1.663,10001.0,1.684,4.48,1.663,10001.0,1.697,4.576,1.568,10001.0,3.449000,4.032000,3.391000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_fusion_18177558693432335165_kernel,1001.0,1.4980000000000002,4.224,1.471,1001.0,1.693,4.384,1.663,10001.0,1.689,4.512,1.663,10001.0,1.689,4.512,1.663,10001.0,1.689,4.512,1.663,10001.0,1.69,4.736,1.663,10001.0,3.474000,3.968000,3.423000
output_gpu,transformer_bs8,Fused_Cast_split_153224478348915484_kernel,1001.0,1.231,3.936,1.215,1001.0,1.231,3.936,1.215,10001.0,1.23,4.032,1.1840000000000002,10001.0,1.23,4.032,1.1840000000000002,10001.0,1.229,4.032,1.215,10001.0,1.228,4.224,1.215,10001.0,2.957000,3.488000,2.911000
output_gpu,transformer_bs8,Fused_Dropout_Add_Cast_fusion_5488749897085513028_kernel,1001.0,2.008,4.864,1.983,1001.0,2.01,2.464,1.983,10001.0,2.009,5.024,1.983,10001.0,2.009,5.024,1.983,10001.0,2.009,4.959,1.983,10001.0,2.011,5.376,1.983,10001.0,3.711000,4.320000,3.647000
output_gpu,transformer_bs8,Fused_Cast_Add_Reshape_Cast_DropoutGrad_Mul_Reshape_fusion_13391714948919121338_kernel,1001.0,2.073,5.12,1.984,1001.0,2.31,2.8160000000000003,2.272,10001.0,2.28,5.343999999999999,2.239,10001.0,2.28,5.343999999999999,2.239,10001.0,2.279,5.248,2.239,10001.0,2.069,2.592,2.016,10001.0,4.033000,5.024000,3.967000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_fusion_2262339192106526786_kernel,1001.0,2.638,5.535,2.559,1001.0,2.468,5.312,2.399,10001.0,2.463,5.343999999999999,2.367,10001.0,2.463,5.343999999999999,2.367,10001.0,2.462,5.376,2.368,10001.0,2.467,8.032,2.368,10001.0,4.161000,4.800000,4.064000
output_gpu,transformer_bs8,Fused_Reciprocal_Mul_Mul_split_12171752079749060903_kernel,1001.0,288.721,295.1,287.645,1001.0,288.778,295.132,287.741,10001.0,288.959,295.194,287.547,10001.0,288.959,295.194,287.547,10001.0,288.976,294.6500000000001,287.675,10001.0,288.938,295.132,287.388,10001.0,290.861,295.45300000000003,289.565
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_fusion_13573898137213720216_kernel,1001.0,7.362999999999999,10.432,7.2,1001.0,5.6,6.656000000000001,5.44,10001.0,5.6,8.992,5.44,10001.0,5.6,8.992,5.44,10001.0,5.6,9.152,5.408,10001.0,10.115,11.136,8.992,10001.0,7.327000,8.448000,7.167000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_split_17945515954624892537_kernel,1001.0,2.624,5.568,2.496,1001.0,2.466,5.312,2.3680000000000003,10001.0,2.464,5.343999999999999,2.367,10001.0,2.464,5.343999999999999,2.367,10001.0,2.463,5.44,2.368,10001.0,2.466,5.6,2.367,10001.0,4.176000,4.768000,4.064000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_fusion_9680756840611995906_kernel,1001.0,3.528,6.528,3.36,1001.0,2.165,5.184,2.111,10001.0,2.147,4.96,2.08,10001.0,2.147,4.96,2.08,10001.0,2.147,5.088,2.08,10001.0,1.846,2.144,1.792,10001.0,3.802000,4.416000,3.743000
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_17292780458335767103_kernel,1001.0,13.073,17.855999999999998,12.608,1001.0,13.062,18.208,12.448,10001.0,13.068,18.112,12.32,10001.0,13.068,18.112,12.32,10001.0,13.252,18.175,12.671,10001.0,12.573,15.104,12.063,10001.0,15.731000,18.240000,14.624000
output_gpu,transformer_bs8,Fused_Cast_Reshape_Sub_Mul_split_15309577687452979626_kernel,1001.0,1.268,4.032,1.247,1001.0,1.266,1.664,1.247,10001.0,1.267,4.192,1.247,10001.0,1.267,4.192,1.247,10001.0,1.266,4.096,1.247,10001.0,1.266,4.0,1.247,10001.0,3.030000,3.552000,2.975000
output_gpu,transformer_bs8,Fused_LessEqual_Sub_LessEqual_LogicalOr_Select_Mul_Maximum_Select_fusion_16815240076823420497_kernel,1001.0,1.808,4.064,1.791,1001.0,1.808,4.16,1.791,10001.0,1.805,4.192,1.791,10001.0,1.805,4.192,1.791,10001.0,1.805,4.224,1.791,10001.0,1.805,4.064,1.791,10001.0,3.396000,4.417000,3.327000
output_gpu,transformer_bs8,Fused_DropoutGrad_8989780959035910932_kernel,1001.0,4.941,8.064,4.863,1001.0,4.945,5.664,4.864,10001.0,4.935,8.288,4.863,10001.0,4.935,8.288,4.863,10001.0,4.941,8.192,4.863,10001.0,7.763,11.039,7.36,10001.0,6.756000,7.552000,6.592000
output_gpu,transformer_bs8,Fused_Cast_DropoutGrad_fusion_11706155091114503013_kernel,1001.0,2.86,5.76,2.752,1001.0,2.865,3.616,2.784,10001.0,2.852,5.76,2.752,10001.0,2.852,5.76,2.752,10001.0,2.852,5.823,2.752,10001.0,5.546,6.08,5.312,10001.0,4.619000,5.377000,4.512000
output_gpu,transformer_bs8,Fused_Cast_Add_Reshape_Cast_DropoutGrad_Mul_Reshape_fusion_15656847807407764271_kernel,1001.0,2.488,5.504,2.399,1001.0,2.444,3.104,2.367,10001.0,2.427,5.376,2.335,10001.0,2.427,5.376,2.335,10001.0,2.427,5.408,2.335,10001.0,2.391,8.448,2.336,10001.0,4.268000,4.928000,4.159000
output_gpu,transformer_bs8,Fused_Cast_Reshape_Sub_Mul_split_2542381849751661468_kernel,1001.0,1.3119999999999998,4.096,1.279,1001.0,1.31,1.696,1.279,10001.0,1.3090000000000002,4.16,1.279,10001.0,1.3090000000000002,4.16,1.279,10001.0,1.309,4.127,1.279,10001.0,1.308,1.76,1.279,10001.0,3.101000,3.616000,3.039000
output_gpu,transformer_bs8,Fused_Mul_Cast_Reshape_Sub_Mul_split_6278030272848470429_kernel,1001.0,2.285,5.151,2.24,1001.0,1.984,5.12,1.951,10001.0,2.052,4.96,2.015,10001.0,2.052,4.96,2.015,10001.0,2.054,5.12,2.015,10001.0,2.057,5.184,2.016,10001.0,3.829000,4.448000,3.775000
output_gpu,transformer_bs8,Fused_Cast_DropoutGrad_fusion_2387595705202539923_kernel,1001.0,2.3480000000000003,5.216,2.3040000000000003,1001.0,2.346,2.8160000000000003,2.3040000000000003,10001.0,2.343,5.312,2.303,10001.0,2.343,5.312,2.303,10001.0,2.344,5.472,2.303,10001.0,2.351,2.88,2.303,10001.0,4.431000,5.120000,4.351000
output_gpu,transformer_bs8,Fused_Cast_Add_DropoutGrad_fusion_3572849669607359649_kernel,1001.0,5.496,9.056,5.184,1001.0,5.457000000000001,6.688,5.152,10001.0,5.541,9.088,5.184,10001.0,5.541,9.088,5.184,10001.0,5.489,9.215,5.184,10001.0,9.811,10.528,8.608,10001.0,7.472000,8.832000,6.848000
output_gpu,transformer_bs8,Fused_Dropout_6728844861968013768_kernel,1001.0,2.883,5.728,2.815,1001.0,2.889,3.424,2.815,10001.0,2.9330000000000003,6.016,2.847,10001.0,2.9330000000000003,6.016,2.847,10001.0,2.931,5.92,2.816,10001.0,5.582,9.024,5.376,10001.0,4.753000,5.409000,4.640000
output_gpu,transformer_bs8,Fused_DropoutGrad_Cast_Mul_fusion_10879815597214713695_kernel,1001.0,1.507,4.224,1.471,1001.0,1.508,4.224,1.471,10001.0,1.504,4.287,1.471,10001.0,1.504,4.287,1.471,10001.0,1.504,4.448,1.471,10001.0,1.504,4.544,1.471,10001.0,3.257000,3.777000,3.199000
output_gpu,transformer_bs8,Fused_Cast_fusion_734865599527651727_kernel,1001.0,292.257,299.644,287.48400000000004,1001.0,293.349,988.916,287.389,10001.0,286.85200000000003,300.315,284.698,10001.0,286.85200000000003,300.315,284.698,10001.0,286.785,300.57,284.731,10001.0,321.95,325.853,313.884,10001.0,293.998,298.909,290.461
output_gpu,transformer_bs8,Fused_Cast_split_15199699009792768175_kernel,1001.0,1.237,3.936,1.215,1001.0,1.236,1.664,1.215,10001.0,1.237,6.976,1.215,10001.0,1.237,6.976,1.215,10001.0,1.235,4.032,1.215,10001.0,1.236,4.544,1.215,10001.0,2.972000,3.520000,2.911000
output_gpu,transformer_bs8,Fused_Cast_fusion_10361145232087373479_kernel,1001.0,1.662,4.512,1.631,1001.0,1.658,2.048,1.631,10001.0,1.6569999999999998,4.6080000000000005,1.631,10001.0,1.6569999999999998,4.6080000000000005,1.631,10001.0,1.657,4.576,1.631,10001.0,1.66,2.08,1.631,10001.0,3.676000,4.384000,3.615000
output_gpu,transformer_bs8,Fused_Dropout_14764012639232757918_kernel,1001.0,4.678,7.744,4.6080000000000005,1001.0,4.672,7.744,4.607,10001.0,4.74,7.936,4.64,10001.0,4.74,7.936,4.64,10001.0,4.745,8.096,4.671,10001.0,9.705,12.895,9.279,10001.0,6.940000,9.312000,6.847000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_3792933777995419853_kernel,1001.0,10.711,13.184,10.08,1001.0,10.964,11.808,10.464,10001.0,10.961,13.344,10.272,10001.0,10.961,13.344,10.272,10001.0,10.95,13.663,10.271,10001.0,10.962,11.936,10.176,10001.0,12.944000,14.400000,12.032000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_16281249836022149518_kernel,1001.0,3.161,6.016,3.104,1001.0,2.507,3.136,2.464,10001.0,2.503,8.992,2.463,10001.0,2.503,8.992,2.463,10001.0,2.501,5.44,2.463,10001.0,2.504,3.136,2.463,10001.0,4.176000,5.025000,4.127000
output_gpu,transformer_bs8,Fused_Reshape_Cast_fusion_16864097049918889730_kernel,1001.0,2.121,5.12,2.016,1001.0,2.096,4.959,1.984,10001.0,2.084,4.96,1.984,10001.0,2.084,4.96,1.984,10001.0,2.083,5.088,1.984,10001.0,1.97,5.248,1.92,10001.0,4.162000,5.120000,4.000000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_8942913306219642117_kernel,1001.0,3.707,6.688,3.552,1001.0,3.828,4.224,3.711,10001.0,3.821,6.688,3.68,10001.0,3.821,6.688,3.68,10001.0,3.822,6.719,3.68,10001.0,3.47,3.872,3.359,10001.0,5.500000,6.048000,5.312000
output_gpu,transformer_bs8,Fused_Add_RealDiv_RealDiv_split_5251982374815163933_kernel,1001.0,1.809,4.064,1.791,1001.0,1.812,3.936,1.791,10001.0,1.806,4.064,1.791,10001.0,1.806,4.064,1.791,10001.0,1.806,4.16,1.791,10001.0,1.811,3.936,1.791,10001.0,3.409000,4.352000,3.328000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_6761606405506234374_kernel,1001.0,2.747,5.568,2.688,1001.0,2.25,5.28,2.207,10001.0,2.241,9.344,2.207,10001.0,2.241,9.344,2.207,10001.0,2.24,5.28,2.207,10001.0,2.032,9.376,1.983,10001.0,4.267000,4.768000,4.191000
output_gpu,transformer_bs8,Fused_Cast_Reciprocal_Mul_Mul_split_6555134534471984018_kernel,1001.0,3.075,5.92,2.944,1001.0,3.077,3.584,2.944,10001.0,3.0660000000000003,6.016,2.943,10001.0,3.0660000000000003,6.016,2.943,10001.0,3.073,6.144,2.944,10001.0,2.63,3.136,2.527,10001.0,4.863000,5.536000,4.671000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_12508421897988650693_kernel,1001.0,2.727,5.5360000000000005,2.656,1001.0,2.193,2.592,2.144,10001.0,2.191,5.504,2.175,10001.0,2.191,5.504,2.175,10001.0,2.191,5.407,2.144,10001.0,2.197,2.56,2.175,10001.0,3.930000,4.736000,3.840000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_2475909425951335001_kernel,1001.0,12.363,16.096,11.744000000000002,1001.0,12.672,13.728,12.127,10001.0,12.658,16.384,11.808,10001.0,12.658,16.384,11.808,10001.0,12.641,16.192,11.968,10001.0,12.652,16.608,11.904,10001.0,14.872000,16.544000,13.919000
output_gpu,transformer_bs8,Fused_Sub_Mul_Cast_Mul_fusion_10477808415663360093_kernel,1001.0,2.077,4.896,2.047,1001.0,2.077,2.528,2.047,10001.0,2.074,4.992,2.047,10001.0,2.074,4.992,2.047,10001.0,2.073,4.992,2.047,10001.0,2.077,5.28,2.047,10001.0,4.116000,4.768000,4.032000
output_gpu,transformer_bs8,Fused_Cast_fusion_13287859665711025656_kernel,1001.0,6.376,9.663,6.207000000000001,1001.0,6.316,7.295,6.144,10001.0,6.3870000000000005,9.6,6.176,10001.0,6.3870000000000005,9.6,6.176,10001.0,6.368,9.632,6.176,10001.0,8.148000000000001,9.248,7.776,10001.0,8.332000,9.504000,7.967000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_3982170492620388206_kernel,1001.0,4.093999999999999,7.167999999999999,3.936,1001.0,4.312,4.736000000000001,4.064,10001.0,4.303,7.392,4.032,10001.0,4.303,7.392,4.032,10001.0,4.301,7.264,4.063,10001.0,3.909,4.224,3.648,10001.0,6.049000,6.976000,5.055000
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_5617039775742171995_kernel,1001.0,2.844,5.792000000000001,2.752,1001.0,2.849,5.696000000000001,2.752,10001.0,2.843,5.792000000000001,2.7510000000000003,10001.0,2.843,5.792000000000001,2.7510000000000003,10001.0,2.84,5.696,2.751,10001.0,2.852,6.048,2.752,10001.0,4.702000,5.376000,4.575000
output_gpu,transformer_bs8,Fused_Add_split_3360788168788334040_kernel,1001.0,2.346,5.28,2.272,1001.0,2.336,2.944,2.272,10001.0,2.332,5.376,2.271,10001.0,2.332,5.376,2.271,10001.0,2.332,5.408,2.271,10001.0,3.397,3.968,3.232,10001.0,4.172000,4.960000,4.063000
output_gpu,transformer_bs8,Fused_Add_split_9389037829819612683_kernel,1001.0,2.137,4.992,2.111,1001.0,2.14,2.656,2.111,10001.0,2.133,5.12,2.111,10001.0,2.133,5.12,2.111,10001.0,2.133,5.087,2.111,10001.0,2.079,2.592,2.047,10001.0,4.059000,4.705000,3.999000
output_gpu,transformer_bs8,Fused_Cast_fusion_4713347921750216747_kernel,1001.0,1.633,4.48,1.599,1001.0,1.63,2.048,1.599,10001.0,1.629,4.512,1.599,10001.0,1.629,4.512,1.599,10001.0,1.629,4.576,1.599,10001.0,1.63,4.352,1.599,10001.0,3.403000,3.968000,3.327000
output_gpu,transformer_bs8,Fused_DropoutGrad_Cast_Mul_fusion_3851206806124328305_kernel,1001.0,23.629,29.6,23.007,1001.0,23.657,29.376,23.008000000000003,10001.0,23.615,29.504,22.783,10001.0,23.615,29.504,22.783,10001.0,23.579,30.303,22.752,10001.0,22.677,29.407,21.856,10001.0,25.697000,29.888000,24.800000
output_gpu,transformer_bs8,Fused_Sub_Mul_Cast_Mul_fusion_9387468554979245990_kernel,1001.0,2.699,5.856,2.623,1001.0,2.465,5.5360000000000005,2.3680000000000003,10001.0,2.458,5.5360000000000005,2.3680000000000003,10001.0,2.458,5.5360000000000005,2.3680000000000003,10001.0,2.458,5.6,2.367,10001.0,2.465,6.08,2.368,10001.0,4.211000,4.832000,4.127000
output_gpu,transformer_bs8,Fused_Mul_Cast_Reshape_Sub_Mul_split_6444264162905663730_kernel,1001.0,1.394,4.128,1.375,1001.0,1.528,1.888,1.503,10001.0,1.423,4.223,1.376,10001.0,1.423,4.223,1.376,10001.0,1.423,4.352,1.407,10001.0,1.423,4.48,1.407,10001.0,3.229000,3.745000,3.167000
output_gpu,transformer_bs8,Fused_Cast_fusion_10676867479163083327_kernel,1001.0,111.333,117.823,108.702,1001.0,111.727,118.111,111.102,10001.0,109.188,116.99,107.581,10001.0,109.188,116.99,107.581,10001.0,108.943,117.181,107.55,10001.0,121.189,124.575,116.222,10001.0,112.993,120.159,111.743
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_8294588600122055599_kernel,1001.0,1.409,4.32,1.375,1001.0,1.408,4.288,1.375,10001.0,1.406,4.352,1.375,10001.0,1.406,4.352,1.375,10001.0,1.406,4.384,1.375,10001.0,1.406,4.768,1.375,10001.0,3.195000,4.160000,3.135000
output_gpu,transformer_bs8,Fused_Dropout_Add_Cast_fusion_7081433273824214948_kernel,1001.0,2.962,5.824,2.88,1001.0,2.971,3.584,2.88,10001.0,2.957,8.96,2.879,10001.0,2.957,8.96,2.879,10001.0,2.957,6.176,2.879,10001.0,5.146,8.352,4.896,10001.0,4.714000,5.312000,4.576000
output_gpu,transformer_bs8,Fused_Mul_Cast_Reshape_Sub_Mul_split_6132005880152471918_kernel,1001.0,1.5219999999999998,4.384,1.503,1001.0,1.579,4.288,1.536,10001.0,1.611,4.416,1.567,10001.0,1.611,4.416,1.567,10001.0,1.611,4.448,1.567,10001.0,1.61,4.8,1.567,10001.0,3.378000,3.935000,3.327000
output_gpu,transformer_bs8,Fused_Cast_fusion_16792286249585388607_kernel,1001.0,2.388,5.343999999999999,2.3040000000000003,1001.0,2.384,2.912,2.3040000000000003,10001.0,2.382,5.28,2.303,10001.0,2.382,5.28,2.303,10001.0,2.383,5.344,2.271,10001.0,3.517,6.272,3.391,10001.0,4.150000,4.768000,4.031000
output_gpu,transformer_bs8,Fused_Cast_Dropout_fusion_433885159236132084_kernel,1001.0,1.5530000000000002,4.383,1.535,1001.0,1.5490000000000002,1.984,1.535,10001.0,1.568,4.288,1.535,10001.0,1.568,4.288,1.535,10001.0,1.568,4.32,1.535,10001.0,1.567,1.952,1.535,10001.0,3.274000,3.808000,3.200000
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_5995407182487148599_kernel,1001.0,13.07,17.952,12.48,1001.0,13.057,18.048,12.576,10001.0,13.041,18.303,12.224,10001.0,13.041,18.303,12.224,10001.0,12.992,18.304,12.16,10001.0,13.082999999999998,18.08,12.256,10001.0,15.043000000000001,17.631999999999998,13.824
output_gpu,transformer_bs8,Fused_BroadcastTo_Mul_Neg_Reshape_BroadcastTo_Mul_split_5924805340053628136_kernel,1001.0,227.633,232.733,225.789,1001.0,203.661,207.453,201.757,10001.0,203.627,207.932,201.66,10001.0,203.627,207.932,201.66,10001.0,203.55900000000003,208.828,201.66,10001.0,203.631,207.293,201.662,10001.0,205.33599999999998,208.671,203.774
output_gpu,transformer_bs8,Fused_Cast_Add_Reshape_Cast_DropoutGrad_Mul_Reshape_fusion_15361860677943940007_kernel,1001.0,10.841,12.255999999999998,10.208,1001.0,10.606,12.928,10.239,10001.0,10.438,12.767,9.855,10001.0,10.438,12.767,9.855,10001.0,10.457,12.544,10.016,10001.0,9.447,11.168,8.64,10001.0,12.693000,14.208000,11.392000
output_gpu,transformer_bs8,Fused_Cast_Add_DropoutGrad_fusion_17569034922758395143_kernel,1001.0,1.911,4.703,1.887,1001.0,1.911,2.3040000000000003,1.887,10001.0,1.908,4.799,1.887,10001.0,1.908,4.799,1.887,10001.0,1.908,4.767,1.887,10001.0,1.909,2.304,1.887,10001.0,3.654000,4.224000,3.615000
output_gpu,transformer_bs8,Fused_Cast_split_6965912970228498022_kernel,1001.0,1.435,3.968,1.4069999999999998,1001.0,1.4340000000000002,1.664,1.4069999999999998,10001.0,1.433,4.064,1.4069999999999998,10001.0,1.433,4.064,1.4069999999999998,10001.0,1.433,4.32,1.407,10001.0,1.434,8.896,1.407,10001.0,3.053000,3.488000,3.007000
output_gpu,transformer_bs8,Fused_BroadcastTo_Mul_Neg_Reshape_BroadcastTo_Mul_split_17848132309943159352_kernel,1001.0,111.419,116.35,110.494,1001.0,102.315,107.262,101.471,10001.0,102.317,108.286,101.31,10001.0,102.317,108.286,101.31,10001.0,102.31,107.966,101.214,10001.0,102.309,106.143,101.374,10001.0,104.56400000000001,107.903,103.456
output_gpu,transformer_bs8,Fused_Sub_Mul_Cast_Mul_fusion_16439948660347314088_kernel,1001.0,1.349,4.192,1.3119999999999998,1001.0,1.594,4.352,1.567,10001.0,1.591,4.383,1.567,10001.0,1.591,4.383,1.567,10001.0,1.591,4.416,1.536,10001.0,1.591,4.672,1.567,10001.0,3.320000,3.840000,3.263000
output_gpu,transformer_bs8,Fused_BroadcastTo_Mul_Neg_Reshape_BroadcastTo_Mul_split_5622522712001701037_kernel,1001.0,166.947,171.549,165.885,1001.0,153.312,761.1110000000001,151.71,10001.0,152.891,157.98100000000005,151.613,10001.0,152.891,157.98100000000005,151.613,10001.0,152.861,157.98100000000005,151.485,10001.0,153.228,868.95,151.48700000000002,10001.0,154.821,160.159,153.567
output_gpu,transformer_bs8,Fused_Cast_Add_Reshape_Cast_DropoutGrad_Mul_Reshape_fusion_1216689417256757215_kernel,1001.0,1.693,4.544,1.632,1001.0,1.851,2.464,1.823,10001.0,1.817,4.64,1.791,10001.0,1.817,4.64,1.791,10001.0,1.816,4.768,1.791,10001.0,1.647,2.08,1.6,10001.0,3.574000,4.192000,3.488000
output_gpu,transformer_bs8,Fused_Dropout_Add_Cast_fusion_14445028853427914389_kernel,1001.0,12.285,15.616,11.648,1001.0,12.282,13.28,11.711,10001.0,12.332,15.872,11.584,10001.0,12.332,15.872,11.584,10001.0,12.334,16.0,11.647,10001.0,13.529,19.104,12.255,10001.0,14.256000,16.128000,13.471000
output_gpu,transformer_bs8,Fused_Cast_fusion_16205283870040168254_kernel,1001.0,3.539,6.496,3.487,1001.0,3.565,6.976,3.488,10001.0,3.532,6.752000000000001,3.455,10001.0,3.532,6.752000000000001,3.455,10001.0,3.532,6.72,3.455,10001.0,2.898,5.696,2.688,10001.0,5.270000,5.888000,5.183000
output_gpu,transformer_bs8,Fused_Cast_Add_DropoutGrad_fusion_14931130129182221035_kernel,1001.0,2.774,5.664,2.688,1001.0,2.386,2.944,2.3040000000000003,10001.0,2.774,5.76,2.656,10001.0,2.774,5.76,2.656,10001.0,2.771,5.696,2.656,10001.0,4.039,4.416,3.808,10001.0,4.590000,5.216000,4.448000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_fusion_1690109525745761376_kernel,1001.0,9.553,11.648,9.151,1001.0,9.085,13.12,8.736,10001.0,9.063,13.216,8.671,10001.0,9.063,13.216,8.671,10001.0,9.05,13.824000000000002,8.671,10001.0,8.97,13.184,7.968,10001.0,11.782000,13.536000,11.103000
output_gpu,transformer_bs8,Fused_Cast_Dropout_fusion_15879854922022882009_kernel,1001.0,3.0410000000000004,6.08,2.944,1001.0,3.044,3.584,2.975,10001.0,3.25,6.303999999999999,3.072,10001.0,3.25,6.303999999999999,3.072,10001.0,3.25,6.336,3.072,10001.0,3.252,3.808,3.072,10001.0,4.907000,5.728000,4.767000
output_gpu,transformer_bs8,Fused_Cast_DropoutGrad_fusion_15090104755473629222_kernel,1001.0,2.604,5.5360000000000005,2.527,1001.0,2.603,3.104,2.496,10001.0,2.6010000000000004,5.632000000000001,2.495,10001.0,2.6010000000000004,5.632000000000001,2.495,10001.0,2.601,5.663,2.496,10001.0,4.583,5.248,4.32,10001.0,4.724000,5.376000,4.607000
output_gpu,transformer_bs8,Fused_Cast_Reshape_Sub_Mul_split_4795476886497233086_kernel,1001.0,1.259,3.968,1.247,1001.0,1.256,1.6,1.247,10001.0,1.256,4.128,1.216,10001.0,1.256,4.128,1.216,10001.0,1.256,4.032,1.216,10001.0,1.262,3.968,1.247,10001.0,3.000000,3.552000,2.943000
output_gpu,transformer_bs8,Fused_Cast_Reciprocal_Mul_Mul_split_5127564958387223209_kernel,1001.0,15.275,21.471,14.528,1001.0,15.272,19.135,14.56,10001.0,15.223,21.152,14.272,10001.0,15.223,21.152,14.272,10001.0,15.263,21.343,14.432,10001.0,15.103,19.167,14.239,10001.0,17.425000,21.024000,16.288000
output_gpu,transformer_bs8,Fused_Cast_fusion_14535054222874744143_kernel,1001.0,2.088,4.992,1.984,1001.0,2.085,2.464,1.984,10001.0,2.085,4.928,1.983,10001.0,2.085,4.928,1.983,10001.0,2.083,4.928,1.984,10001.0,1.97,2.56,1.919,10001.0,3.844000,4.415000,3.712000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_Dropout_Add_Cast_fusion_8002991998771687003_kernel,1001.0,4.342,7.392,4.192,1001.0,4.602,5.152,4.448,10001.0,4.593999999999999,7.648,4.416,10001.0,4.593999999999999,7.648,4.416,10001.0,4.594,7.68,4.416,10001.0,4.167,9.312,4.0,10001.0,6.750000,7.264000,6.496000
output_gpu,transformer_bs8,Fused_Reshape_Cast_fusion_5924342323058826695_kernel,1001.0,3.699,6.944,3.647,1001.0,3.724,6.464,3.648,10001.0,3.532,6.527,3.4560000000000004,10001.0,3.532,6.527,3.4560000000000004,10001.0,3.532,6.656,3.456,10001.0,2.897,6.048,2.688,10001.0,5.745000,6.432000,5.632000
output_gpu,transformer_bs8,Fused_Cast_fusion_6685816579543234625_kernel,1001.0,39.265,44.927,38.816,1001.0,39.287,42.719,38.816,10001.0,39.032,44.99100000000001,38.207,10001.0,39.032,44.99100000000001,38.207,10001.0,39.032,45.119,38.239,10001.0,36.916,43.615,35.742999999999995,10001.0,41.817,45.696,40.575
output_gpu,transformer_bs8,Fused_DropoutGrad_4949663339776267250_kernel,1001.0,18.409,24.32,17.952,1001.0,18.41,24.16,17.984,10001.0,18.354,24.22300000000001,17.535999999999998,10001.0,18.354,24.22300000000001,17.535999999999998,10001.0,18.403,24.415,17.855,10001.0,21.257,26.111,19.648,10001.0,20.836000,24.288000,19.872000
output_gpu,transformer_bs8,Fused_DropoutGrad_13859649003237529132_kernel,1001.0,6.696000000000001,10.048,6.528,1001.0,6.733,10.368,6.528,10001.0,6.694,10.208,6.464,10001.0,6.694,10.208,6.464,10001.0,6.689,10.112,6.495,10001.0,9.912,13.088,9.472,10001.0,8.631000,9.856000,8.128000
output_gpu,transformer_bs8,Fused_Cast_Add_Reshape_Cast_DropoutGrad_Mul_Reshape_fusion_13664596767171641049_kernel,1001.0,2.814,6.016,2.688,1001.0,2.793,3.68,2.688,10001.0,2.785,6.016,2.687,10001.0,2.785,6.016,2.687,10001.0,2.786,8.672,2.687,10001.0,2.384,3.136,2.335,10001.0,4.590000,5.248000,4.448000
output_gpu,transformer_bs8,Fused_Cast_Add_DropoutGrad_fusion_15954869068047669516_kernel,1001.0,3.036,5.952000000000001,2.944,1001.0,2.609,3.3280000000000003,2.528,10001.0,3.029,6.112,2.944,10001.0,3.029,6.112,2.944,10001.0,3.03,5.984,2.944,10001.0,4.789,5.376,4.576,10001.0,4.837000,5.504000,4.735000
output_gpu,transformer_bs8,Fused_Cast_fusion_16139074981860551828_kernel,1001.0,76.935,82.431,75.935,1001.0,76.958,80.671,76.415,10001.0,76.183,82.94200000000002,75.487,10001.0,76.183,82.94200000000002,75.487,10001.0,76.24799999999998,82.43,75.391,10001.0,72.38799999999999,80.28699999999999,71.008,10001.0,78.68799999999999,82.623,78.016
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_13955133592113895256_kernel,1001.0,194.864,200.958,193.725,1001.0,195.111,200.894,193.565,10001.0,194.49400000000003,202.173,193.596,10001.0,194.49400000000003,202.173,193.596,10001.0,194.61900000000003,202.236,193.404,10001.0,194.663,202.429,193.15,10001.0,197.517,200.89399999999998,196.702
output_gpu,transformer_bs8,Fused_Cast_Dropout_fusion_1146851414665827198_kernel,1001.0,2.1590000000000003,4.992,2.111,1001.0,2.154,2.752,2.112,10001.0,2.29,5.216,2.239,10001.0,2.29,5.216,2.239,10001.0,2.291,5.28,2.239,10001.0,1.993,2.432,1.951,10001.0,4.025000,4.640000,3.936000
output_gpu,transformer_bs8,Fused_Add_fusion_13430267926236179158_kernel,1001.0,1.37,3.36,1.3430000000000002,1001.0,1.369,2.016,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.368,3.424,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.369,1.888,1.343,10001.0,3.012000,3.680000,2.943000
output_gpu,transformer_bs8,Fused_Cast_Reshape_Sub_Mul_split_11341266579542945644_kernel,1001.0,2.13,5.024,2.111,1001.0,2.13,2.624,2.111,10001.0,2.126,5.088,2.079,10001.0,2.126,5.088,2.079,10001.0,2.126,5.056,2.079,10001.0,2.13,5.119,2.08,10001.0,3.919000,4.896000,3.839000
output_gpu,transformer_bs8,Fused_Dropout_Add_Cast_fusion_8670351153427464688_kernel,1001.0,3.4760000000000004,6.56,3.3280000000000003,1001.0,3.479,6.624,3.3280000000000003,10001.0,3.471,6.784,3.3280000000000003,10001.0,3.471,6.784,3.3280000000000003,10001.0,3.468,6.88,3.328,10001.0,6.401,9.92,6.176,10001.0,5.163000,6.080000,5.055000
output_gpu,transformer_bs8,Fused_Cast_DropoutGrad_fusion_9543736008552606298_kernel,1001.0,4.591,7.712000000000001,4.512,1001.0,4.593,5.343999999999999,4.512,10001.0,4.589,7.776,4.511,10001.0,4.589,7.776,4.511,10001.0,4.584,7.6800000000000015,4.512,10001.0,9.565,10.432,8.096,10001.0,6.800000,7.584000,6.688000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_fusion_7826052084438599771_kernel,1001.0,4.9110000000000005,7.68,4.736000000000001,1001.0,3.247,3.712,3.168,10001.0,3.24,6.144,3.167,10001.0,3.24,6.144,3.167,10001.0,3.239,6.112,3.167,10001.0,2.939,3.328,2.879,10001.0,4.970000,5.504000,4.864000
output_gpu,transformer_bs8,Fused_Cast_split_14882105803640499118_kernel,1001.0,1.237,3.936,1.215,1001.0,1.237,3.968,1.215,10001.0,1.235,4.0,1.215,10001.0,1.235,4.0,1.215,10001.0,1.235,4.032,1.215,10001.0,1.064,1.44,1.023,10001.0,2.952000,3.456000,2.911000
output_gpu,transformer_bs8,Fused_Cast_split_6495672851799929469_kernel,1001.0,1.243,3.935,1.215,1001.0,1.24,1.6,1.215,10001.0,1.241,4.032,1.215,10001.0,1.241,4.032,1.215,10001.0,1.241,4.032,1.215,10001.0,1.24,4.224,1.215,10001.0,2.972000,3.488000,2.911000
output_gpu,transformer_bs8,Fused_Cast_fusion_2942684815373358502_kernel,1001.0,2.069,4.832,2.047,1001.0,1.78,2.144,1.759,10001.0,2.066,4.928,2.016,10001.0,2.066,4.928,2.016,10001.0,2.067,4.96,2.016,10001.0,2.071,2.56,2.047,10001.0,3.823000,4.481000,3.775000
output_gpu,transformer_bs8,Fused_Sub_Mul_Cast_Mul_fusion_12215741611476392021_kernel,1001.0,15.197,20.032,14.848,1001.0,15.28,20.512,14.752,10001.0,15.235,20.256,14.656,10001.0,15.235,20.256,14.656,10001.0,15.27,20.416,14.72,10001.0,15.258,20.896,14.656,10001.0,17.529000,21.312000,16.352000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_fusion_7359470329768724585_kernel,1001.0,1.315,4.16,1.28,1001.0,1.489,4.192,1.471,10001.0,1.4880000000000002,4.256,1.471,10001.0,1.4880000000000002,4.256,1.471,10001.0,1.488,4.288,1.471,10001.0,1.488,4.512,1.471,10001.0,3.251000,3.744000,3.167000
output_gpu,transformer_bs8,Fused_Mul_Cast_Reshape_Sub_Mul_split_9840965998151121184_kernel,1001.0,1.3130000000000002,4.192,1.279,1001.0,1.482,4.192,1.471,10001.0,1.319,4.095,1.28,10001.0,1.319,4.095,1.28,10001.0,1.319,4.288,1.28,10001.0,1.319,4.32,1.28,10001.0,3.110000,4.512000,3.040000
output_gpu,transformer_bs8,Fused_Reshape_Cast_fusion_5105613123194475780_kernel,1001.0,2.343,5.312,2.271,1001.0,2.331,2.8160000000000003,2.24,10001.0,2.316,5.184,2.239,10001.0,2.316,5.184,2.239,10001.0,2.316,5.184,2.239,10001.0,2.19,5.856,2.144,10001.0,4.414000,5.088000,4.319000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_fusion_9116889263416481309_kernel,1001.0,6.148,8.992,6.016,1001.0,4.877,8.416,4.704,10001.0,4.845,8.0,4.671,10001.0,4.845,8.0,4.671,10001.0,4.844,7.936,4.64,10001.0,7.243,7.904,6.912,10001.0,6.528000,7.584000,6.304000
output_gpu,transformer_bs8,Fused_Cast_fusion_17970026256896640430_kernel,1001.0,75.117,80.383,74.495,1001.0,75.102,77.695,74.367,10001.0,73.503,80.639,72.09400000000002,10001.0,73.503,80.639,72.09400000000002,10001.0,73.428,80.60600000000001,71.87,10001.0,81.167,84.44699999999999,77.663,10001.0,76.503,80.031,75.67999999999999
output_gpu,transformer_bs8,Fused_Cast_fusion_17924798252532011620_kernel,1001.0,6.375,9.472,6.175,1001.0,6.417000000000001,9.76,6.239,10001.0,6.385,9.632,6.176,10001.0,6.385,9.632,6.176,10001.0,6.388999999999999,10.048,6.176,10001.0,8.15,8.831999999999999,7.807,10001.0,8.357000,9.696000,7.968000
output_gpu,transformer_bs8,Fused_Cast_fusion_18216012640326669745_kernel,1001.0,148.166,153.886,147.262,1001.0,144.165,146.846,143.19799999999998,10001.0,144.627,153.565,142.78099999999995,10001.0,144.627,153.565,142.78099999999995,10001.0,144.51600000000005,153.43699999999995,142.71800000000005,10001.0,161.615,165.534,156.254,10001.0,148.665,151.807,147.678
output_gpu,transformer_bs8,Fused_Dropout_Add_Cast_fusion_11127080223908801270_kernel,1001.0,2.495,5.408,2.432,1001.0,2.502,3.072,2.463,10001.0,2.506,8.703,2.432,10001.0,2.506,8.703,2.432,10001.0,2.503,5.504,2.432,10001.0,2.507,5.76,2.463,10001.0,4.279000,4.928000,4.191000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_split_8118040641341421899_kernel,1001.0,1.998,4.928,1.952,1001.0,1.896,4.736000000000001,1.824,10001.0,1.893,4.8,1.824,10001.0,1.893,4.8,1.824,10001.0,1.893,4.896,1.824,10001.0,1.895,5.184,1.824,10001.0,3.663000,4.257000,3.615000
output_gpu,transformer_bs8,Fused_Dropout_3544029600254149266_kernel,1001.0,12.536,14.976,11.84,1001.0,12.515999999999998,15.52,11.776,10001.0,12.608,15.263,11.903,10001.0,12.608,15.263,11.903,10001.0,12.58,14.944,11.616,10001.0,16.379,18.336,14.911,10001.0,14.511000,15.936000,13.632000
output_gpu,transformer_bs8,Fused_Mul_Add_Cast_fusion_11168657805263854442_kernel,1001.0,2.0,4.896,1.952,1001.0,1.896,4.864,1.855,10001.0,1.89,4.768,1.824,10001.0,1.89,4.768,1.824,10001.0,1.891,5.12,1.824,10001.0,1.892,5.184,1.824,10001.0,3.941000,4.543000,3.871000
output_gpu,transformer_bs8,Fused_Mul_fusion_5218434561050803946_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.374,4.896,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.369,6.72,1.343,10001.0,3.018000,3.744000,2.944000
output_gpu,transformer_bs8,Fused_Mul_Cast_Reshape_Sub_Mul_split_4561109844344657348_kernel,1001.0,1.319,4.032,1.279,1001.0,1.526,4.224,1.503,10001.0,1.334,4.128,1.311,10001.0,1.334,4.128,1.311,10001.0,1.334,4.128,1.311,10001.0,1.334,4.448,1.248,10001.0,3.087000,3.872000,3.008000
output_gpu,transformer_bs8,Fused_Cast_BiasAdd_fusion_8962117738945412772_kernel,1001.0,17.452,20.671,17.152,1001.0,14.886,17.567999999999998,14.432,10001.0,14.633,19.904,13.728,10001.0,14.633,19.904,13.728,10001.0,14.888,19.968,14.304,10001.0,17.343,19.616000000000003,16.096,10001.0,16.805000,19.520000,16.096000
output_gpu,transformer_bs8,Fused_Reshape_Cast_fusion_14440924142704444280_kernel,1001.0,1.459,4.288,1.439,1001.0,1.654,4.384,1.631,10001.0,1.629,4.544,1.599,10001.0,1.629,4.544,1.599,10001.0,1.629,4.48,1.599,10001.0,1.63,4.671,1.599,10001.0,3.409000,3.968000,3.328000
output_gpu,transformer_bs8,Fused_Cast_fusion_17364629653806350082_kernel,1001.0,114.455,120.286,113.183,1001.0,114.695,118.079,114.175,10001.0,113.565,120.733,112.702,10001.0,113.565,120.733,112.702,10001.0,113.58,120.51,112.67,10001.0,107.941,114.303,106.623,10001.0,116.69200000000001,121.05499999999999,115.711
output_gpu,transformer_bs8,Fused_Mul_fusion_13438744724785477572_kernel,1001.0,1.371,3.36,1.3430000000000002,1001.0,1.37,3.36,1.3430000000000002,10001.0,1.369,3.455,1.3430000000000002,10001.0,1.369,3.455,1.3430000000000002,10001.0,1.369,3.424,1.343,10001.0,1.369,3.36,1.343,10001.0,3.029000,4.032000,2.943000
output_gpu,transformer_bs8,Fused_Cast_fusion_9192843148205968599_kernel,1001.0,3.641,6.496,3.584,1001.0,3.129,3.584,3.072,10001.0,3.636,6.656000000000001,3.583,10001.0,3.636,6.656000000000001,3.583,10001.0,3.635,6.592,3.583,10001.0,4.672,5.152,4.448,10001.0,5.370000,6.016000,5.279000
output_gpu,transformer_bs8,Fused_DropoutGrad_Cast_Mul_fusion_8464255796101984825_kernel,1001.0,1.934,4.704,1.888,1001.0,1.936,4.832,1.919,10001.0,1.935,9.568,1.887,10001.0,1.935,9.568,1.887,10001.0,1.931,4.992,1.887,10001.0,1.933,5.024,1.888,10001.0,3.692000,4.288000,3.616000
output_gpu,transformer_bs8,Fused_Mul_Greater_OnesLike_Select_Sqrt_Select_Maximum_Div_fusion_13570414355987636576_kernel,1001.0,1.354,4.192,1.3119999999999998,1001.0,1.354,4.16,1.3119999999999998,10001.0,1.352,4.256,1.3119999999999998,10001.0,1.352,4.256,1.3119999999999998,10001.0,1.351,4.256,1.312,10001.0,1.352,4.544,1.311,10001.0,3.093000,3.808000,3.039000
output_gpu,transformer_bs8,Fused_Reciprocal_Mul_Mul_split_1889642591342779156_kernel,1001.0,1.429,4.288,1.4069999999999998,1001.0,1.429,4.256,1.4069999999999998,10001.0,1.425,4.352,1.4069999999999998,10001.0,1.425,4.352,1.4069999999999998,10001.0,1.425,4.32,1.407,10001.0,1.224,1.664,1.184,10001.0,3.133000,3.776000,3.071000
output_gpu,transformer_bs8,Fused_BroadcastTo_Mul_Neg_Reshape_BroadcastTo_Mul_split_10161381592862811965_kernel,1001.0,460.851,465.146,458.426,1001.0,405.156,409.275,402.779,10001.0,405.644,410.04,402.777,10001.0,405.644,410.04,402.777,10001.0,405.644,410.52,402.681,10001.0,405.851,1275.538,402.78,10001.0,406.95,411.964,404.82800000000003
output_gpu,transformer_bs8,Fused_Cast_fusion_8860067908337289260_kernel,1001.0,302.005,308.476,299.836,1001.0,301.117,309.532,299.03700000000003,10001.0,299.85400000000004,309.05,298.874,10001.0,299.85400000000004,309.05,298.874,10001.0,299.879,308.5379999999999,298.683,10001.0,285.36,291.133,283.677,10001.0,304.19,307.64599999999996,303.06899999999996
output_gpu,transformer_bs8,Fused_Cast_fusion_1592908079013826888_kernel,1001.0,2.324,5.216,2.24,1001.0,2.319,2.752,2.24,10001.0,2.316,5.184,2.239,10001.0,2.316,5.184,2.239,10001.0,2.316,5.408,2.239,10001.0,2.185,5.056,2.144,10001.0,4.071000,4.737000,3.967000
output_gpu,transformer_bs8,Fused_Cast_Dropout_fusion_17568105015633329930_kernel,1001.0,1.924,4.768,1.887,1001.0,1.921,2.336,1.887,10001.0,1.955,4.896,1.919,10001.0,1.955,4.896,1.919,10001.0,1.955,4.832,1.919,10001.0,1.957,2.368,1.919,10001.0,3.660000,4.320000,3.615000
output_gpu,transformer_bs8,Fused_Cast_fusion_14052062154174736980_kernel,1001.0,2.117,5.056,2.079,1001.0,2.116,2.688,2.08,10001.0,2.114,5.056,2.079,10001.0,2.114,5.056,2.079,10001.0,2.113,5.056,2.079,10001.0,2.125,7.072,2.08,10001.0,3.891000,4.576000,3.808000
output_gpu,transformer_bs8,Fused_Add_split_13350401852779236455_kernel,1001.0,2.128,4.96,2.048,1001.0,2.123,2.688,2.048,10001.0,2.122,5.184,2.015,10001.0,2.122,5.184,2.015,10001.0,2.123,5.119,2.016,10001.0,3.062,3.52,2.816,10001.0,3.983000,4.864000,3.903000
output_gpu,transformer_bs8,Fused_Cast_fusion_74518656200978243_kernel,1001.0,2.148,5.12,2.048,1001.0,2.16,5.247999999999999,2.08,10001.0,2.143,5.152,2.048,10001.0,2.143,5.152,2.048,10001.0,2.145,9.792,2.048,10001.0,2.859,6.08,2.687,10001.0,3.923000,4.448000,3.776000
output_gpu,transformer_bs8,Fused_Cast_fusion_197229419439132631_kernel,1001.0,148.07399999999998,153.566,144.734,1001.0,144.023,146.878,142.782,10001.0,144.382,153.43699999999995,142.845,10001.0,144.382,153.43699999999995,142.845,10001.0,144.665,153.40599999999995,142.846,10001.0,161.781,165.14999999999998,158.942,10001.0,148.624,151.96699999999998,147.615
output_gpu,transformer_bs8,Fused_AssignAdd_17552493758390469362_kernel,1001.0,1.372,3.52,1.3430000000000002,1001.0,1.42,1.952,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.421,3.488,1.4069999999999998,10001.0,1.42,3.488,1.407,10001.0,1.388,1.92,1.375,10001.0,3.165000,3.871000,3.103000
output_gpu,transformer_bs8,Fused_Cast_Dropout_fusion_1148599074270147613_kernel,1001.0,18.126,22.368,17.472,1001.0,18.146,20.48,17.664,10001.0,18.39,22.976,17.535,10001.0,18.39,22.976,17.535,10001.0,18.432,22.943,17.632,10001.0,18.038,20.032,17.439,10001.0,20.481000,22.656000,19.552000
output_gpu,transformer_bs8,Fused_Cast_fusion_7226987304258350882_kernel,1001.0,152.555,157.566,151.966,1001.0,153.24200000000002,834.966,151.90200000000002,10001.0,150.88899999999995,157.885,150.077,10001.0,150.88899999999995,157.885,150.077,10001.0,150.834,158.077,149.94899999999998,10001.0,143.34,148.862,141.982,10001.0,153.85299999999998,158.207,153.055
output_gpu,transformer_bs8,Fused_Cast_DropoutGrad_fusion_5359054177573077531_kernel,1001.0,1.862,4.64,1.823,1001.0,1.86,2.272,1.824,10001.0,1.859,4.704,1.823,10001.0,1.859,4.704,1.823,10001.0,1.859,4.736,1.823,10001.0,1.862,2.272,1.823,10001.0,3.897000,4.544000,3.808000
output_gpu,transformer_bs8,Fused_Cast_Reciprocal_Mul_Mul_split_10242566393862639565_kernel,1001.0,1.484,4.48,1.471,1001.0,1.481,2.176,1.471,10001.0,1.48,4.5760000000000005,1.471,10001.0,1.48,4.5760000000000005,1.471,10001.0,1.48,4.576,1.471,10001.0,1.481,2.048,1.471,10001.0,3.421000,4.128000,3.359000
output_gpu,transformer_bs8,Fused_Cast_Reshape_Sub_Mul_split_5972016718924389897_kernel,1001.0,1.431,4.224,1.4069999999999998,1001.0,1.43,1.7919999999999998,1.4069999999999998,10001.0,1.429,4.352,1.4069999999999998,10001.0,1.429,4.352,1.4069999999999998,10001.0,1.429,4.224,1.407,10001.0,1.43,4.288,1.407,10001.0,3.203000,3.744000,3.135000
output_gpu,user_cases,Fused_BiasAdd_1551558231201032373_kernel,1001.0,13.236,15.808,12.8,1001.0,13.236,15.808,12.8,10001.0,12.621,18.176,11.903,10001.0,12.621,18.176,11.903,10001.0,12.622,17.663,11.872,10001.0,12.626,17.983,12.064,10001.0,14.857000000000001,17.919,14.239
output_gpu,user_cases,Fused_ReduceMean_728894389712972095_kernel,1001.0,41.782,53.183,41.215,1001.0,41.782,53.183,41.215,10001.0,41.927,54.399,40.895,10001.0,41.927,54.399,40.895,10001.0,42.18600000000001,53.535,40.927,10001.0,42.13,51.775,40.895,10001.0,45.108000000000004,54.783,43.679000
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_8198810867738950778_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,316.98,989.649,310.044,10001.0,334.708,362.653,330.524
output_gpu,yolov3_darknet,Fused_Reshape_Transpose_split_6170990274532565034_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,177.231,887.602,173.43800000000002,10001.0,190.101,207.23,186.494
output_gpu,yolov3_darknet,Fused_Add_Add_Add_Cast_Transpose_split_3265145179629954688_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,140.96,841.684,138.686,10001.0,143.60600000000002,151.038,141.247
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_13588083744242635116_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,144.93900000000002,835.86,143.133,10001.0,156.072,160.73499999999999,154.462
output_gpu,yolov3_darknet,Fused_Transpose_split_9540331858271689701_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,224.253,935.313,218.684,10001.0,244.91,248.09400000000002,243.485
output_gpu,yolov3_darknet,Fused_Transpose_split_10858175714249765537_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,114.652,133.182,110.654,10001.0,126.63499999999999,140.191,124.159
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_5767912037163420354_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,906.461,1601.447,902.098,10001.0,1003.342,1005.3030000000001,1001.7510000000001
output_gpu,yolov3_darknet,Fused_Reshape_Transpose_split_11055555196978473023_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,176.435,879.73,171.966,10001.0,188.73899999999998,193.56599999999997,185.79000000000002
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_17888158432536564356_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,152.291,767.765,151.93300000000002,10001.0,170.201,171.13400000000001,169.95
output_gpu,yolov3_darknet,Fused_Add_Add_Add_Cast_Transpose_split_1665896329323212233_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,689.005,1409.353,678.3249999999999,10001.0,680.9849999999999,689.785,674.49
output_gpu,yolov3_darknet,Fused_Mul_Maximum_Transpose_split_13074222870906220418_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,302.284,302.811,302.043,10001.0,336.05899999999997,336.669,335.837
output_gpu,yolov3_darknet,Fused_Transpose_split_15197132033665191594_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,111.032,790.388,107.998,10001.0,121.28999999999999,123.871,119.551
output_gpu,yolov3_darknet,Fused_Transpose_MaximumGrad_Mul_Transpose_Transpose_Add_fusion_14416323746107203464_kernel,,,,,,,,,,,,,,,,,,,,,10001.0,76.947,801.333,75.359,10001.0,84.423,89.407,82.975