{
"best_metric": 0.6078575555438837,
"best_model_checkpoint": "sai17/cards_bottom_right_swin-tiny-patch4-window7-224-finetuned-v2/checkpoint-33462",
"epoch": 29.98879342547628,
"eval_steps": 500,
"global_step": 40140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.2456402590931739e-07,
"loss": 2.3681,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.4912805181863477e-07,
"loss": 2.3624,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 3.7369207772795216e-07,
"loss": 2.3535,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 4.982561036372695e-07,
"loss": 2.3238,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 6.22820129546587e-07,
"loss": 2.2964,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 7.473841554559043e-07,
"loss": 2.2803,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 8.719481813652218e-07,
"loss": 2.235,
"step": 70
},
{
"epoch": 0.06,
"learning_rate": 9.96512207274539e-07,
"loss": 2.2036,
"step": 80
},
{
"epoch": 0.07,
"learning_rate": 1.1210762331838566e-06,
"loss": 2.1817,
"step": 90
},
{
"epoch": 0.07,
"learning_rate": 1.245640259093174e-06,
"loss": 2.1279,
"step": 100
},
{
"epoch": 0.08,
"learning_rate": 1.3702042850024913e-06,
"loss": 2.1105,
"step": 110
},
{
"epoch": 0.09,
"learning_rate": 1.4947683109118086e-06,
"loss": 2.0694,
"step": 120
},
{
"epoch": 0.1,
"learning_rate": 1.619332336821126e-06,
"loss": 2.053,
"step": 130
},
{
"epoch": 0.1,
"learning_rate": 1.7438963627304436e-06,
"loss": 2.0218,
"step": 140
},
{
"epoch": 0.11,
"learning_rate": 1.868460388639761e-06,
"loss": 2.0114,
"step": 150
},
{
"epoch": 0.12,
"learning_rate": 1.993024414549078e-06,
"loss": 1.9766,
"step": 160
},
{
"epoch": 0.13,
"learning_rate": 2.117588440458396e-06,
"loss": 1.9622,
"step": 170
},
{
"epoch": 0.13,
"learning_rate": 2.242152466367713e-06,
"loss": 1.9496,
"step": 180
},
{
"epoch": 0.14,
"learning_rate": 2.366716492277031e-06,
"loss": 1.8768,
"step": 190
},
{
"epoch": 0.15,
"learning_rate": 2.491280518186348e-06,
"loss": 1.8796,
"step": 200
},
{
"epoch": 0.16,
"learning_rate": 2.6158445440956654e-06,
"loss": 1.883,
"step": 210
},
{
"epoch": 0.16,
"learning_rate": 2.7404085700049827e-06,
"loss": 1.869,
"step": 220
},
{
"epoch": 0.17,
"learning_rate": 2.8649725959143e-06,
"loss": 1.8319,
"step": 230
},
{
"epoch": 0.18,
"learning_rate": 2.9895366218236172e-06,
"loss": 1.839,
"step": 240
},
{
"epoch": 0.19,
"learning_rate": 3.114100647732935e-06,
"loss": 1.8395,
"step": 250
},
{
"epoch": 0.19,
"learning_rate": 3.238664673642252e-06,
"loss": 1.7919,
"step": 260
},
{
"epoch": 0.2,
"learning_rate": 3.3632286995515695e-06,
"loss": 1.812,
"step": 270
},
{
"epoch": 0.21,
"learning_rate": 3.487792725460887e-06,
"loss": 1.7378,
"step": 280
},
{
"epoch": 0.22,
"learning_rate": 3.6123567513702045e-06,
"loss": 1.7971,
"step": 290
},
{
"epoch": 0.22,
"learning_rate": 3.736920777279522e-06,
"loss": 1.7779,
"step": 300
},
{
"epoch": 0.23,
"learning_rate": 3.861484803188839e-06,
"loss": 1.7522,
"step": 310
},
{
"epoch": 0.24,
"learning_rate": 3.986048829098156e-06,
"loss": 1.7473,
"step": 320
},
{
"epoch": 0.25,
"learning_rate": 4.110612855007474e-06,
"loss": 1.7602,
"step": 330
},
{
"epoch": 0.25,
"learning_rate": 4.235176880916792e-06,
"loss": 1.7355,
"step": 340
},
{
"epoch": 0.26,
"learning_rate": 4.359740906826109e-06,
"loss": 1.7457,
"step": 350
},
{
"epoch": 0.27,
"learning_rate": 4.484304932735426e-06,
"loss": 1.7418,
"step": 360
},
{
"epoch": 0.28,
"learning_rate": 4.608868958644744e-06,
"loss": 1.7143,
"step": 370
},
{
"epoch": 0.28,
"learning_rate": 4.733432984554062e-06,
"loss": 1.6792,
"step": 380
},
{
"epoch": 0.29,
"learning_rate": 4.8579970104633785e-06,
"loss": 1.6866,
"step": 390
},
{
"epoch": 0.3,
"learning_rate": 4.982561036372696e-06,
"loss": 1.73,
"step": 400
},
{
"epoch": 0.31,
"learning_rate": 5.107125062282013e-06,
"loss": 1.7063,
"step": 410
},
{
"epoch": 0.31,
"learning_rate": 5.231689088191331e-06,
"loss": 1.6531,
"step": 420
},
{
"epoch": 0.32,
"learning_rate": 5.356253114100648e-06,
"loss": 1.668,
"step": 430
},
{
"epoch": 0.33,
"learning_rate": 5.480817140009965e-06,
"loss": 1.6915,
"step": 440
},
{
"epoch": 0.34,
"learning_rate": 5.605381165919283e-06,
"loss": 1.6881,
"step": 450
},
{
"epoch": 0.34,
"learning_rate": 5.7299451918286e-06,
"loss": 1.6694,
"step": 460
},
{
"epoch": 0.35,
"learning_rate": 5.854509217737918e-06,
"loss": 1.6488,
"step": 470
},
{
"epoch": 0.36,
"learning_rate": 5.9790732436472345e-06,
"loss": 1.6445,
"step": 480
},
{
"epoch": 0.37,
"learning_rate": 6.103637269556552e-06,
"loss": 1.6573,
"step": 490
},
{
"epoch": 0.37,
"learning_rate": 6.22820129546587e-06,
"loss": 1.6781,
"step": 500
},
{
"epoch": 0.38,
"learning_rate": 6.352765321375187e-06,
"loss": 1.6435,
"step": 510
},
{
"epoch": 0.39,
"learning_rate": 6.477329347284504e-06,
"loss": 1.6165,
"step": 520
},
{
"epoch": 0.4,
"learning_rate": 6.601893373193822e-06,
"loss": 1.6274,
"step": 530
},
{
"epoch": 0.4,
"learning_rate": 6.726457399103139e-06,
"loss": 1.5762,
"step": 540
},
{
"epoch": 0.41,
"learning_rate": 6.851021425012457e-06,
"loss": 1.6139,
"step": 550
},
{
"epoch": 0.42,
"learning_rate": 6.975585450921774e-06,
"loss": 1.6278,
"step": 560
},
{
"epoch": 0.43,
"learning_rate": 7.100149476831091e-06,
"loss": 1.6368,
"step": 570
},
{
"epoch": 0.43,
"learning_rate": 7.224713502740409e-06,
"loss": 1.6018,
"step": 580
},
{
"epoch": 0.44,
"learning_rate": 7.349277528649726e-06,
"loss": 1.6139,
"step": 590
},
{
"epoch": 0.45,
"learning_rate": 7.473841554559044e-06,
"loss": 1.569,
"step": 600
},
{
"epoch": 0.46,
"learning_rate": 7.598405580468361e-06,
"loss": 1.5638,
"step": 610
},
{
"epoch": 0.46,
"learning_rate": 7.722969606377677e-06,
"loss": 1.6057,
"step": 620
},
{
"epoch": 0.47,
"learning_rate": 7.847533632286996e-06,
"loss": 1.5762,
"step": 630
},
{
"epoch": 0.48,
"learning_rate": 7.972097658196313e-06,
"loss": 1.6152,
"step": 640
},
{
"epoch": 0.49,
"learning_rate": 8.09666168410563e-06,
"loss": 1.5539,
"step": 650
},
{
"epoch": 0.49,
"learning_rate": 8.221225710014948e-06,
"loss": 1.5505,
"step": 660
},
{
"epoch": 0.5,
"learning_rate": 8.345789735924265e-06,
"loss": 1.6271,
"step": 670
},
{
"epoch": 0.51,
"learning_rate": 8.470353761833583e-06,
"loss": 1.5849,
"step": 680
},
{
"epoch": 0.52,
"learning_rate": 8.5949177877429e-06,
"loss": 1.5817,
"step": 690
},
{
"epoch": 0.52,
"learning_rate": 8.719481813652217e-06,
"loss": 1.6132,
"step": 700
},
{
"epoch": 0.53,
"learning_rate": 8.844045839561536e-06,
"loss": 1.5737,
"step": 710
},
{
"epoch": 0.54,
"learning_rate": 8.968609865470853e-06,
"loss": 1.5367,
"step": 720
},
{
"epoch": 0.55,
"learning_rate": 9.093173891380171e-06,
"loss": 1.5975,
"step": 730
},
{
"epoch": 0.55,
"learning_rate": 9.217737917289488e-06,
"loss": 1.5518,
"step": 740
},
{
"epoch": 0.56,
"learning_rate": 9.342301943198805e-06,
"loss": 1.5335,
"step": 750
},
{
"epoch": 0.57,
"learning_rate": 9.466865969108123e-06,
"loss": 1.5824,
"step": 760
},
{
"epoch": 0.58,
"learning_rate": 9.59142999501744e-06,
"loss": 1.6016,
"step": 770
},
{
"epoch": 0.58,
"learning_rate": 9.715994020926757e-06,
"loss": 1.5689,
"step": 780
},
{
"epoch": 0.59,
"learning_rate": 9.840558046836074e-06,
"loss": 1.591,
"step": 790
},
{
"epoch": 0.6,
"learning_rate": 9.965122072745393e-06,
"loss": 1.5611,
"step": 800
},
{
"epoch": 0.61,
"learning_rate": 1.008968609865471e-05,
"loss": 1.594,
"step": 810
},
{
"epoch": 0.61,
"learning_rate": 1.0214250124564026e-05,
"loss": 1.5732,
"step": 820
},
{
"epoch": 0.62,
"learning_rate": 1.0338814150473345e-05,
"loss": 1.5731,
"step": 830
},
{
"epoch": 0.63,
"learning_rate": 1.0463378176382662e-05,
"loss": 1.5574,
"step": 840
},
{
"epoch": 0.64,
"learning_rate": 1.0587942202291978e-05,
"loss": 1.5341,
"step": 850
},
{
"epoch": 0.64,
"learning_rate": 1.0712506228201295e-05,
"loss": 1.562,
"step": 860
},
{
"epoch": 0.65,
"learning_rate": 1.0837070254110614e-05,
"loss": 1.5615,
"step": 870
},
{
"epoch": 0.66,
"learning_rate": 1.096163428001993e-05,
"loss": 1.5494,
"step": 880
},
{
"epoch": 0.66,
"learning_rate": 1.1086198305929248e-05,
"loss": 1.5816,
"step": 890
},
{
"epoch": 0.67,
"learning_rate": 1.1210762331838566e-05,
"loss": 1.5571,
"step": 900
},
{
"epoch": 0.68,
"learning_rate": 1.1335326357747883e-05,
"loss": 1.6037,
"step": 910
},
{
"epoch": 0.69,
"learning_rate": 1.14598903836572e-05,
"loss": 1.525,
"step": 920
},
{
"epoch": 0.69,
"learning_rate": 1.1584454409566518e-05,
"loss": 1.532,
"step": 930
},
{
"epoch": 0.7,
"learning_rate": 1.1709018435475835e-05,
"loss": 1.5277,
"step": 940
},
{
"epoch": 0.71,
"learning_rate": 1.1833582461385152e-05,
"loss": 1.5249,
"step": 950
},
{
"epoch": 0.72,
"learning_rate": 1.1958146487294469e-05,
"loss": 1.5471,
"step": 960
},
{
"epoch": 0.72,
"learning_rate": 1.2082710513203788e-05,
"loss": 1.5541,
"step": 970
},
{
"epoch": 0.73,
"learning_rate": 1.2207274539113104e-05,
"loss": 1.5406,
"step": 980
},
{
"epoch": 0.74,
"learning_rate": 1.2331838565022421e-05,
"loss": 1.5822,
"step": 990
},
{
"epoch": 0.75,
"learning_rate": 1.245640259093174e-05,
"loss": 1.5309,
"step": 1000
},
{
"epoch": 0.75,
"learning_rate": 1.2580966616841055e-05,
"loss": 1.5405,
"step": 1010
},
{
"epoch": 0.76,
"learning_rate": 1.2705530642750373e-05,
"loss": 1.5257,
"step": 1020
},
{
"epoch": 0.77,
"learning_rate": 1.2830094668659692e-05,
"loss": 1.5182,
"step": 1030
},
{
"epoch": 0.78,
"learning_rate": 1.2954658694569007e-05,
"loss": 1.5301,
"step": 1040
},
{
"epoch": 0.78,
"learning_rate": 1.3079222720478326e-05,
"loss": 1.5157,
"step": 1050
},
{
"epoch": 0.79,
"learning_rate": 1.3203786746387644e-05,
"loss": 1.543,
"step": 1060
},
{
"epoch": 0.8,
"learning_rate": 1.332835077229696e-05,
"loss": 1.5295,
"step": 1070
},
{
"epoch": 0.81,
"learning_rate": 1.3452914798206278e-05,
"loss": 1.5202,
"step": 1080
},
{
"epoch": 0.81,
"learning_rate": 1.3577478824115597e-05,
"loss": 1.5509,
"step": 1090
},
{
"epoch": 0.82,
"learning_rate": 1.3702042850024913e-05,
"loss": 1.5076,
"step": 1100
},
{
"epoch": 0.83,
"learning_rate": 1.382660687593423e-05,
"loss": 1.5784,
"step": 1110
},
{
"epoch": 0.84,
"learning_rate": 1.3951170901843549e-05,
"loss": 1.488,
"step": 1120
},
{
"epoch": 0.84,
"learning_rate": 1.4075734927752866e-05,
"loss": 1.5451,
"step": 1130
},
{
"epoch": 0.85,
"learning_rate": 1.4200298953662183e-05,
"loss": 1.5296,
"step": 1140
},
{
"epoch": 0.86,
"learning_rate": 1.4324862979571501e-05,
"loss": 1.507,
"step": 1150
},
{
"epoch": 0.87,
"learning_rate": 1.4449427005480818e-05,
"loss": 1.5237,
"step": 1160
},
{
"epoch": 0.87,
"learning_rate": 1.4573991031390136e-05,
"loss": 1.5365,
"step": 1170
},
{
"epoch": 0.88,
"learning_rate": 1.4698555057299452e-05,
"loss": 1.4671,
"step": 1180
},
{
"epoch": 0.89,
"learning_rate": 1.482311908320877e-05,
"loss": 1.5421,
"step": 1190
},
{
"epoch": 0.9,
"learning_rate": 1.4947683109118089e-05,
"loss": 1.5452,
"step": 1200
},
{
"epoch": 0.9,
"learning_rate": 1.5072247135027404e-05,
"loss": 1.5348,
"step": 1210
},
{
"epoch": 0.91,
"learning_rate": 1.5196811160936722e-05,
"loss": 1.5065,
"step": 1220
},
{
"epoch": 0.92,
"learning_rate": 1.532137518684604e-05,
"loss": 1.5206,
"step": 1230
},
{
"epoch": 0.93,
"learning_rate": 1.5445939212755354e-05,
"loss": 1.4605,
"step": 1240
},
{
"epoch": 0.93,
"learning_rate": 1.5570503238664673e-05,
"loss": 1.4862,
"step": 1250
},
{
"epoch": 0.94,
"learning_rate": 1.569506726457399e-05,
"loss": 1.5264,
"step": 1260
},
{
"epoch": 0.95,
"learning_rate": 1.5819631290483307e-05,
"loss": 1.5014,
"step": 1270
},
{
"epoch": 0.96,
"learning_rate": 1.5944195316392625e-05,
"loss": 1.442,
"step": 1280
},
{
"epoch": 0.96,
"learning_rate": 1.6068759342301944e-05,
"loss": 1.5164,
"step": 1290
},
{
"epoch": 0.97,
"learning_rate": 1.619332336821126e-05,
"loss": 1.4757,
"step": 1300
},
{
"epoch": 0.98,
"learning_rate": 1.6317887394120578e-05,
"loss": 1.4818,
"step": 1310
},
{
"epoch": 0.99,
"learning_rate": 1.6442451420029896e-05,
"loss": 1.466,
"step": 1320
},
{
"epoch": 0.99,
"learning_rate": 1.656701544593921e-05,
"loss": 1.4965,
"step": 1330
},
{
"epoch": 1.0,
"eval_accuracy": 0.415620568307159,
"eval_loss": 1.3515620231628418,
"eval_runtime": 71.4809,
"eval_samples_per_second": 266.351,
"eval_steps_per_second": 8.324,
"step": 1338
},
{
"epoch": 1.0,
"learning_rate": 1.669157947184853e-05,
"loss": 1.5293,
"step": 1340
},
{
"epoch": 1.01,
"learning_rate": 1.681614349775785e-05,
"loss": 1.5072,
"step": 1350
},
{
"epoch": 1.02,
"learning_rate": 1.6940707523667167e-05,
"loss": 1.5072,
"step": 1360
},
{
"epoch": 1.02,
"learning_rate": 1.7065271549576482e-05,
"loss": 1.4888,
"step": 1370
},
{
"epoch": 1.03,
"learning_rate": 1.71898355754858e-05,
"loss": 1.5276,
"step": 1380
},
{
"epoch": 1.04,
"learning_rate": 1.731439960139512e-05,
"loss": 1.5533,
"step": 1390
},
{
"epoch": 1.05,
"learning_rate": 1.7438963627304434e-05,
"loss": 1.4956,
"step": 1400
},
{
"epoch": 1.05,
"learning_rate": 1.7563527653213753e-05,
"loss": 1.4759,
"step": 1410
},
{
"epoch": 1.06,
"learning_rate": 1.768809167912307e-05,
"loss": 1.4494,
"step": 1420
},
{
"epoch": 1.07,
"learning_rate": 1.781265570503239e-05,
"loss": 1.4875,
"step": 1430
},
{
"epoch": 1.08,
"learning_rate": 1.7937219730941705e-05,
"loss": 1.4431,
"step": 1440
},
{
"epoch": 1.08,
"learning_rate": 1.8061783756851024e-05,
"loss": 1.5103,
"step": 1450
},
{
"epoch": 1.09,
"learning_rate": 1.8186347782760342e-05,
"loss": 1.5091,
"step": 1460
},
{
"epoch": 1.1,
"learning_rate": 1.8310911808669657e-05,
"loss": 1.5027,
"step": 1470
},
{
"epoch": 1.11,
"learning_rate": 1.8435475834578976e-05,
"loss": 1.523,
"step": 1480
},
{
"epoch": 1.11,
"learning_rate": 1.8560039860488295e-05,
"loss": 1.4731,
"step": 1490
},
{
"epoch": 1.12,
"learning_rate": 1.868460388639761e-05,
"loss": 1.5176,
"step": 1500
},
{
"epoch": 1.13,
"learning_rate": 1.8809167912306928e-05,
"loss": 1.4936,
"step": 1510
},
{
"epoch": 1.14,
"learning_rate": 1.8933731938216247e-05,
"loss": 1.5214,
"step": 1520
},
{
"epoch": 1.14,
"learning_rate": 1.9058295964125562e-05,
"loss": 1.5096,
"step": 1530
},
{
"epoch": 1.15,
"learning_rate": 1.918285999003488e-05,
"loss": 1.4808,
"step": 1540
},
{
"epoch": 1.16,
"learning_rate": 1.9307424015944196e-05,
"loss": 1.5009,
"step": 1550
},
{
"epoch": 1.17,
"learning_rate": 1.9431988041853514e-05,
"loss": 1.5298,
"step": 1560
},
{
"epoch": 1.17,
"learning_rate": 1.9556552067762833e-05,
"loss": 1.5014,
"step": 1570
},
{
"epoch": 1.18,
"learning_rate": 1.9681116093672148e-05,
"loss": 1.5428,
"step": 1580
},
{
"epoch": 1.19,
"learning_rate": 1.9805680119581466e-05,
"loss": 1.4961,
"step": 1590
},
{
"epoch": 1.2,
"learning_rate": 1.9930244145490785e-05,
"loss": 1.4838,
"step": 1600
},
{
"epoch": 1.2,
"learning_rate": 2.00548081714001e-05,
"loss": 1.4841,
"step": 1610
},
{
"epoch": 1.21,
"learning_rate": 2.017937219730942e-05,
"loss": 1.5171,
"step": 1620
},
{
"epoch": 1.22,
"learning_rate": 2.0303936223218737e-05,
"loss": 1.487,
"step": 1630
},
{
"epoch": 1.23,
"learning_rate": 2.0428500249128052e-05,
"loss": 1.5065,
"step": 1640
},
{
"epoch": 1.23,
"learning_rate": 2.055306427503737e-05,
"loss": 1.5012,
"step": 1650
},
{
"epoch": 1.24,
"learning_rate": 2.067762830094669e-05,
"loss": 1.4717,
"step": 1660
},
{
"epoch": 1.25,
"learning_rate": 2.0802192326856005e-05,
"loss": 1.499,
"step": 1670
},
{
"epoch": 1.26,
"learning_rate": 2.0926756352765323e-05,
"loss": 1.5168,
"step": 1680
},
{
"epoch": 1.26,
"learning_rate": 2.1051320378674642e-05,
"loss": 1.4715,
"step": 1690
},
{
"epoch": 1.27,
"learning_rate": 2.1175884404583957e-05,
"loss": 1.5063,
"step": 1700
},
{
"epoch": 1.28,
"learning_rate": 2.1300448430493276e-05,
"loss": 1.4813,
"step": 1710
},
{
"epoch": 1.29,
"learning_rate": 2.142501245640259e-05,
"loss": 1.504,
"step": 1720
},
{
"epoch": 1.29,
"learning_rate": 2.154957648231191e-05,
"loss": 1.4938,
"step": 1730
},
{
"epoch": 1.3,
"learning_rate": 2.1674140508221228e-05,
"loss": 1.4801,
"step": 1740
},
{
"epoch": 1.31,
"learning_rate": 2.1798704534130543e-05,
"loss": 1.4947,
"step": 1750
},
{
"epoch": 1.31,
"learning_rate": 2.192326856003986e-05,
"loss": 1.4785,
"step": 1760
},
{
"epoch": 1.32,
"learning_rate": 2.204783258594918e-05,
"loss": 1.4633,
"step": 1770
},
{
"epoch": 1.33,
"learning_rate": 2.2172396611858495e-05,
"loss": 1.479,
"step": 1780
},
{
"epoch": 1.34,
"learning_rate": 2.2296960637767814e-05,
"loss": 1.4846,
"step": 1790
},
{
"epoch": 1.34,
"learning_rate": 2.2421524663677132e-05,
"loss": 1.4492,
"step": 1800
},
{
"epoch": 1.35,
"learning_rate": 2.2546088689586447e-05,
"loss": 1.4542,
"step": 1810
},
{
"epoch": 1.36,
"learning_rate": 2.2670652715495766e-05,
"loss": 1.5269,
"step": 1820
},
{
"epoch": 1.37,
"learning_rate": 2.2795216741405085e-05,
"loss": 1.4731,
"step": 1830
},
{
"epoch": 1.37,
"learning_rate": 2.29197807673144e-05,
"loss": 1.4985,
"step": 1840
},
{
"epoch": 1.38,
"learning_rate": 2.3044344793223718e-05,
"loss": 1.4946,
"step": 1850
},
{
"epoch": 1.39,
"learning_rate": 2.3168908819133037e-05,
"loss": 1.4472,
"step": 1860
},
{
"epoch": 1.4,
"learning_rate": 2.3293472845042352e-05,
"loss": 1.4569,
"step": 1870
},
{
"epoch": 1.4,
"learning_rate": 2.341803687095167e-05,
"loss": 1.4809,
"step": 1880
},
{
"epoch": 1.41,
"learning_rate": 2.354260089686099e-05,
"loss": 1.5144,
"step": 1890
},
{
"epoch": 1.42,
"learning_rate": 2.3667164922770304e-05,
"loss": 1.4954,
"step": 1900
},
{
"epoch": 1.43,
"learning_rate": 2.3791728948679623e-05,
"loss": 1.5037,
"step": 1910
},
{
"epoch": 1.43,
"learning_rate": 2.3916292974588938e-05,
"loss": 1.4545,
"step": 1920
},
{
"epoch": 1.44,
"learning_rate": 2.4040857000498256e-05,
"loss": 1.471,
"step": 1930
},
{
"epoch": 1.45,
"learning_rate": 2.4165421026407575e-05,
"loss": 1.4995,
"step": 1940
},
{
"epoch": 1.46,
"learning_rate": 2.428998505231689e-05,
"loss": 1.4842,
"step": 1950
},
{
"epoch": 1.46,
"learning_rate": 2.441454907822621e-05,
"loss": 1.4264,
"step": 1960
},
{
"epoch": 1.47,
"learning_rate": 2.4539113104135527e-05,
"loss": 1.5007,
"step": 1970
},
{
"epoch": 1.48,
"learning_rate": 2.4663677130044842e-05,
"loss": 1.5177,
"step": 1980
},
{
"epoch": 1.49,
"learning_rate": 2.478824115595416e-05,
"loss": 1.4946,
"step": 1990
},
{
"epoch": 1.49,
"learning_rate": 2.491280518186348e-05,
"loss": 1.4646,
"step": 2000
},
{
"epoch": 1.5,
"learning_rate": 2.5037369207772798e-05,
"loss": 1.4593,
"step": 2010
},
{
"epoch": 1.51,
"learning_rate": 2.516193323368211e-05,
"loss": 1.484,
"step": 2020
},
{
"epoch": 1.52,
"learning_rate": 2.528649725959143e-05,
"loss": 1.4431,
"step": 2030
},
{
"epoch": 1.52,
"learning_rate": 2.5411061285500747e-05,
"loss": 1.4944,
"step": 2040
},
{
"epoch": 1.53,
"learning_rate": 2.5535625311410066e-05,
"loss": 1.4636,
"step": 2050
},
{
"epoch": 1.54,
"learning_rate": 2.5660189337319384e-05,
"loss": 1.4593,
"step": 2060
},
{
"epoch": 1.55,
"learning_rate": 2.5784753363228703e-05,
"loss": 1.4568,
"step": 2070
},
{
"epoch": 1.55,
"learning_rate": 2.5909317389138014e-05,
"loss": 1.4707,
"step": 2080
},
{
"epoch": 1.56,
"learning_rate": 2.6033881415047333e-05,
"loss": 1.4637,
"step": 2090
},
{
"epoch": 1.57,
"learning_rate": 2.615844544095665e-05,
"loss": 1.4887,
"step": 2100
},
{
"epoch": 1.58,
"learning_rate": 2.628300946686597e-05,
"loss": 1.4417,
"step": 2110
},
{
"epoch": 1.58,
"learning_rate": 2.640757349277529e-05,
"loss": 1.4382,
"step": 2120
},
{
"epoch": 1.59,
"learning_rate": 2.6532137518684607e-05,
"loss": 1.4814,
"step": 2130
},
{
"epoch": 1.6,
"learning_rate": 2.665670154459392e-05,
"loss": 1.5115,
"step": 2140
},
{
"epoch": 1.61,
"learning_rate": 2.6781265570503237e-05,
"loss": 1.4776,
"step": 2150
},
{
"epoch": 1.61,
"learning_rate": 2.6905829596412556e-05,
"loss": 1.4685,
"step": 2160
},
{
"epoch": 1.62,
"learning_rate": 2.7030393622321875e-05,
"loss": 1.474,
"step": 2170
},
{
"epoch": 1.63,
"learning_rate": 2.7154957648231193e-05,
"loss": 1.483,
"step": 2180
},
{
"epoch": 1.64,
"learning_rate": 2.7279521674140508e-05,
"loss": 1.4975,
"step": 2190
},
{
"epoch": 1.64,
"learning_rate": 2.7404085700049827e-05,
"loss": 1.4521,
"step": 2200
},
{
"epoch": 1.65,
"learning_rate": 2.7528649725959142e-05,
"loss": 1.4596,
"step": 2210
},
{
"epoch": 1.66,
"learning_rate": 2.765321375186846e-05,
"loss": 1.4638,
"step": 2220
},
{
"epoch": 1.67,
"learning_rate": 2.777777777777778e-05,
"loss": 1.4505,
"step": 2230
},
{
"epoch": 1.67,
"learning_rate": 2.7902341803687098e-05,
"loss": 1.4482,
"step": 2240
},
{
"epoch": 1.68,
"learning_rate": 2.8026905829596413e-05,
"loss": 1.4269,
"step": 2250
},
{
"epoch": 1.69,
"learning_rate": 2.815146985550573e-05,
"loss": 1.5059,
"step": 2260
},
{
"epoch": 1.7,
"learning_rate": 2.827603388141505e-05,
"loss": 1.4493,
"step": 2270
},
{
"epoch": 1.7,
"learning_rate": 2.8400597907324365e-05,
"loss": 1.4647,
"step": 2280
},
{
"epoch": 1.71,
"learning_rate": 2.8525161933233684e-05,
"loss": 1.465,
"step": 2290
},
{
"epoch": 1.72,
"learning_rate": 2.8649725959143002e-05,
"loss": 1.4955,
"step": 2300
},
{
"epoch": 1.73,
"learning_rate": 2.8774289985052317e-05,
"loss": 1.44,
"step": 2310
},
{
"epoch": 1.73,
"learning_rate": 2.8898854010961636e-05,
"loss": 1.472,
"step": 2320
},
{
"epoch": 1.74,
"learning_rate": 2.9023418036870954e-05,
"loss": 1.4538,
"step": 2330
},
{
"epoch": 1.75,
"learning_rate": 2.9147982062780273e-05,
"loss": 1.4596,
"step": 2340
},
{
"epoch": 1.76,
"learning_rate": 2.9272546088689588e-05,
"loss": 1.4733,
"step": 2350
},
{
"epoch": 1.76,
"learning_rate": 2.9397110114598903e-05,
"loss": 1.4758,
"step": 2360
},
{
"epoch": 1.77,
"learning_rate": 2.9521674140508222e-05,
"loss": 1.4339,
"step": 2370
},
{
"epoch": 1.78,
"learning_rate": 2.964623816641754e-05,
"loss": 1.4666,
"step": 2380
},
{
"epoch": 1.79,
"learning_rate": 2.977080219232686e-05,
"loss": 1.529,
"step": 2390
},
{
"epoch": 1.79,
"learning_rate": 2.9895366218236178e-05,
"loss": 1.4508,
"step": 2400
},
{
"epoch": 1.8,
"learning_rate": 3.0019930244145496e-05,
"loss": 1.4848,
"step": 2410
},
{
"epoch": 1.81,
"learning_rate": 3.0144494270054808e-05,
"loss": 1.4969,
"step": 2420
},
{
"epoch": 1.82,
"learning_rate": 3.0269058295964126e-05,
"loss": 1.4432,
"step": 2430
},
{
"epoch": 1.82,
"learning_rate": 3.0393622321873445e-05,
"loss": 1.4578,
"step": 2440
},
{
"epoch": 1.83,
"learning_rate": 3.051818634778276e-05,
"loss": 1.4663,
"step": 2450
},
{
"epoch": 1.84,
"learning_rate": 3.064275037369208e-05,
"loss": 1.4646,
"step": 2460
},
{
"epoch": 1.85,
"learning_rate": 3.07673143996014e-05,
"loss": 1.467,
"step": 2470
},
{
"epoch": 1.85,
"learning_rate": 3.089187842551071e-05,
"loss": 1.4983,
"step": 2480
},
{
"epoch": 1.86,
"learning_rate": 3.101644245142003e-05,
"loss": 1.4441,
"step": 2490
},
{
"epoch": 1.87,
"learning_rate": 3.1141006477329346e-05,
"loss": 1.5113,
"step": 2500
},
{
"epoch": 1.88,
"learning_rate": 3.1265570503238665e-05,
"loss": 1.462,
"step": 2510
},
{
"epoch": 1.88,
"learning_rate": 3.139013452914798e-05,
"loss": 1.4877,
"step": 2520
},
{
"epoch": 1.89,
"learning_rate": 3.15146985550573e-05,
"loss": 1.4251,
"step": 2530
},
{
"epoch": 1.9,
"learning_rate": 3.1639262580966613e-05,
"loss": 1.4748,
"step": 2540
},
{
"epoch": 1.91,
"learning_rate": 3.176382660687593e-05,
"loss": 1.4915,
"step": 2550
},
{
"epoch": 1.91,
"learning_rate": 3.188839063278525e-05,
"loss": 1.4827,
"step": 2560
},
{
"epoch": 1.92,
"learning_rate": 3.201295465869457e-05,
"loss": 1.4708,
"step": 2570
},
{
"epoch": 1.93,
"learning_rate": 3.213751868460389e-05,
"loss": 1.4687,
"step": 2580
},
{
"epoch": 1.94,
"learning_rate": 3.2262082710513206e-05,
"loss": 1.5089,
"step": 2590
},
{
"epoch": 1.94,
"learning_rate": 3.238664673642252e-05,
"loss": 1.4702,
"step": 2600
},
{
"epoch": 1.95,
"learning_rate": 3.2511210762331837e-05,
"loss": 1.4472,
"step": 2610
},
{
"epoch": 1.96,
"learning_rate": 3.2635774788241155e-05,
"loss": 1.4507,
"step": 2620
},
{
"epoch": 1.96,
"learning_rate": 3.2760338814150474e-05,
"loss": 1.4484,
"step": 2630
},
{
"epoch": 1.97,
"learning_rate": 3.288490284005979e-05,
"loss": 1.4383,
"step": 2640
},
{
"epoch": 1.98,
"learning_rate": 3.300946686596911e-05,
"loss": 1.469,
"step": 2650
},
{
"epoch": 1.99,
"learning_rate": 3.313403089187842e-05,
"loss": 1.4704,
"step": 2660
},
{
"epoch": 1.99,
"learning_rate": 3.325859491778774e-05,
"loss": 1.4486,
"step": 2670
},
{
"epoch": 2.0,
"eval_accuracy": 0.49377593360995853,
"eval_loss": 1.1784321069717407,
"eval_runtime": 75.0361,
"eval_samples_per_second": 253.731,
"eval_steps_per_second": 7.93,
"step": 2677
},
{
"epoch": 2.0,
"learning_rate": 3.338315894369706e-05,
"loss": 1.4867,
"step": 2680
},
{
"epoch": 2.01,
"learning_rate": 3.350772296960638e-05,
"loss": 1.4697,
"step": 2690
},
{
"epoch": 2.02,
"learning_rate": 3.36322869955157e-05,
"loss": 1.4654,
"step": 2700
},
{
"epoch": 2.02,
"learning_rate": 3.3756851021425015e-05,
"loss": 1.4756,
"step": 2710
},
{
"epoch": 2.03,
"learning_rate": 3.3881415047334334e-05,
"loss": 1.4649,
"step": 2720
},
{
"epoch": 2.04,
"learning_rate": 3.4005979073243646e-05,
"loss": 1.4663,
"step": 2730
},
{
"epoch": 2.05,
"learning_rate": 3.4130543099152964e-05,
"loss": 1.4705,
"step": 2740
},
{
"epoch": 2.05,
"learning_rate": 3.425510712506228e-05,
"loss": 1.4567,
"step": 2750
},
{
"epoch": 2.06,
"learning_rate": 3.43796711509716e-05,
"loss": 1.461,
"step": 2760
},
{
"epoch": 2.07,
"learning_rate": 3.450423517688092e-05,
"loss": 1.4509,
"step": 2770
},
{
"epoch": 2.08,
"learning_rate": 3.462879920279024e-05,
"loss": 1.4471,
"step": 2780
},
{
"epoch": 2.08,
"learning_rate": 3.475336322869956e-05,
"loss": 1.4806,
"step": 2790
},
{
"epoch": 2.09,
"learning_rate": 3.487792725460887e-05,
"loss": 1.4294,
"step": 2800
},
{
"epoch": 2.1,
"learning_rate": 3.500249128051819e-05,
"loss": 1.4167,
"step": 2810
},
{
"epoch": 2.11,
"learning_rate": 3.5127055306427506e-05,
"loss": 1.4318,
"step": 2820
},
{
"epoch": 2.11,
"learning_rate": 3.5251619332336824e-05,
"loss": 1.4609,
"step": 2830
},
{
"epoch": 2.12,
"learning_rate": 3.537618335824614e-05,
"loss": 1.4426,
"step": 2840
},
{
"epoch": 2.13,
"learning_rate": 3.550074738415546e-05,
"loss": 1.4347,
"step": 2850
},
{
"epoch": 2.14,
"learning_rate": 3.562531141006478e-05,
"loss": 1.4859,
"step": 2860
},
{
"epoch": 2.14,
"learning_rate": 3.574987543597409e-05,
"loss": 1.4771,
"step": 2870
},
{
"epoch": 2.15,
"learning_rate": 3.587443946188341e-05,
"loss": 1.4554,
"step": 2880
},
{
"epoch": 2.16,
"learning_rate": 3.599900348779273e-05,
"loss": 1.4731,
"step": 2890
},
{
"epoch": 2.17,
"learning_rate": 3.612356751370205e-05,
"loss": 1.4591,
"step": 2900
},
{
"epoch": 2.17,
"learning_rate": 3.6248131539611366e-05,
"loss": 1.4454,
"step": 2910
},
{
"epoch": 2.18,
"learning_rate": 3.6372695565520685e-05,
"loss": 1.4672,
"step": 2920
},
{
"epoch": 2.19,
"learning_rate": 3.6497259591429996e-05,
"loss": 1.4507,
"step": 2930
},
{
"epoch": 2.2,
"learning_rate": 3.6621823617339315e-05,
"loss": 1.4656,
"step": 2940
},
{
"epoch": 2.2,
"learning_rate": 3.674638764324863e-05,
"loss": 1.4804,
"step": 2950
},
{
"epoch": 2.21,
"learning_rate": 3.687095166915795e-05,
"loss": 1.4576,
"step": 2960
},
{
"epoch": 2.22,
"learning_rate": 3.699551569506727e-05,
"loss": 1.4403,
"step": 2970
},
{
"epoch": 2.23,
"learning_rate": 3.712007972097659e-05,
"loss": 1.4201,
"step": 2980
},
{
"epoch": 2.23,
"learning_rate": 3.72446437468859e-05,
"loss": 1.4382,
"step": 2990
},
{
"epoch": 2.24,
"learning_rate": 3.736920777279522e-05,
"loss": 1.4223,
"step": 3000
},
{
"epoch": 2.25,
"learning_rate": 3.749377179870454e-05,
"loss": 1.4844,
"step": 3010
},
{
"epoch": 2.26,
"learning_rate": 3.7618335824613856e-05,
"loss": 1.4411,
"step": 3020
},
{
"epoch": 2.26,
"learning_rate": 3.7742899850523175e-05,
"loss": 1.4481,
"step": 3030
},
{
"epoch": 2.27,
"learning_rate": 3.7867463876432494e-05,
"loss": 1.4669,
"step": 3040
},
{
"epoch": 2.28,
"learning_rate": 3.7992027902341805e-05,
"loss": 1.4271,
"step": 3050
},
{
"epoch": 2.29,
"learning_rate": 3.8116591928251124e-05,
"loss": 1.405,
"step": 3060
},
{
"epoch": 2.29,
"learning_rate": 3.824115595416044e-05,
"loss": 1.4487,
"step": 3070
},
{
"epoch": 2.3,
"learning_rate": 3.836571998006976e-05,
"loss": 1.5204,
"step": 3080
},
{
"epoch": 2.31,
"learning_rate": 3.849028400597908e-05,
"loss": 1.4575,
"step": 3090
},
{
"epoch": 2.32,
"learning_rate": 3.861484803188839e-05,
"loss": 1.4592,
"step": 3100
},
{
"epoch": 2.32,
"learning_rate": 3.873941205779771e-05,
"loss": 1.4397,
"step": 3110
},
{
"epoch": 2.33,
"learning_rate": 3.886397608370703e-05,
"loss": 1.4857,
"step": 3120
},
{
"epoch": 2.34,
"learning_rate": 3.898854010961635e-05,
"loss": 1.441,
"step": 3130
},
{
"epoch": 2.35,
"learning_rate": 3.9113104135525665e-05,
"loss": 1.449,
"step": 3140
},
{
"epoch": 2.35,
"learning_rate": 3.9237668161434984e-05,
"loss": 1.4348,
"step": 3150
},
{
"epoch": 2.36,
"learning_rate": 3.9362232187344296e-05,
"loss": 1.4185,
"step": 3160
},
{
"epoch": 2.37,
"learning_rate": 3.9486796213253614e-05,
"loss": 1.4683,
"step": 3170
},
{
"epoch": 2.38,
"learning_rate": 3.961136023916293e-05,
"loss": 1.4478,
"step": 3180
},
{
"epoch": 2.38,
"learning_rate": 3.973592426507225e-05,
"loss": 1.4481,
"step": 3190
},
{
"epoch": 2.39,
"learning_rate": 3.986048829098157e-05,
"loss": 1.412,
"step": 3200
},
{
"epoch": 2.4,
"learning_rate": 3.998505231689089e-05,
"loss": 1.4763,
"step": 3210
},
{
"epoch": 2.41,
"learning_rate": 4.01096163428002e-05,
"loss": 1.4394,
"step": 3220
},
{
"epoch": 2.41,
"learning_rate": 4.023418036870952e-05,
"loss": 1.4199,
"step": 3230
},
{
"epoch": 2.42,
"learning_rate": 4.035874439461884e-05,
"loss": 1.4607,
"step": 3240
},
{
"epoch": 2.43,
"learning_rate": 4.0483308420528156e-05,
"loss": 1.4517,
"step": 3250
},
{
"epoch": 2.44,
"learning_rate": 4.0607872446437475e-05,
"loss": 1.4155,
"step": 3260
},
{
"epoch": 2.44,
"learning_rate": 4.0732436472346786e-05,
"loss": 1.4777,
"step": 3270
},
{
"epoch": 2.45,
"learning_rate": 4.0857000498256105e-05,
"loss": 1.4679,
"step": 3280
},
{
"epoch": 2.46,
"learning_rate": 4.0981564524165423e-05,
"loss": 1.4283,
"step": 3290
},
{
"epoch": 2.47,
"learning_rate": 4.110612855007474e-05,
"loss": 1.4297,
"step": 3300
},
{
"epoch": 2.47,
"learning_rate": 4.123069257598406e-05,
"loss": 1.4651,
"step": 3310
},
{
"epoch": 2.48,
"learning_rate": 4.135525660189338e-05,
"loss": 1.4665,
"step": 3320
},
{
"epoch": 2.49,
"learning_rate": 4.147982062780269e-05,
"loss": 1.4798,
"step": 3330
},
{
"epoch": 2.5,
"learning_rate": 4.160438465371201e-05,
"loss": 1.48,
"step": 3340
},
{
"epoch": 2.5,
"learning_rate": 4.172894867962133e-05,
"loss": 1.4317,
"step": 3350
},
{
"epoch": 2.51,
"learning_rate": 4.1853512705530646e-05,
"loss": 1.4519,
"step": 3360
},
{
"epoch": 2.52,
"learning_rate": 4.1978076731439965e-05,
"loss": 1.4051,
"step": 3370
},
{
"epoch": 2.53,
"learning_rate": 4.2102640757349284e-05,
"loss": 1.4801,
"step": 3380
},
{
"epoch": 2.53,
"learning_rate": 4.2227204783258595e-05,
"loss": 1.4254,
"step": 3390
},
{
"epoch": 2.54,
"learning_rate": 4.2351768809167914e-05,
"loss": 1.4581,
"step": 3400
},
{
"epoch": 2.55,
"learning_rate": 4.247633283507723e-05,
"loss": 1.4592,
"step": 3410
},
{
"epoch": 2.56,
"learning_rate": 4.260089686098655e-05,
"loss": 1.4342,
"step": 3420
},
{
"epoch": 2.56,
"learning_rate": 4.272546088689587e-05,
"loss": 1.4155,
"step": 3430
},
{
"epoch": 2.57,
"learning_rate": 4.285002491280518e-05,
"loss": 1.4197,
"step": 3440
},
{
"epoch": 2.58,
"learning_rate": 4.29745889387145e-05,
"loss": 1.4437,
"step": 3450
},
{
"epoch": 2.58,
"learning_rate": 4.309915296462382e-05,
"loss": 1.456,
"step": 3460
},
{
"epoch": 2.59,
"learning_rate": 4.322371699053314e-05,
"loss": 1.4594,
"step": 3470
},
{
"epoch": 2.6,
"learning_rate": 4.3348281016442456e-05,
"loss": 1.4608,
"step": 3480
},
{
"epoch": 2.61,
"learning_rate": 4.3472845042351774e-05,
"loss": 1.4719,
"step": 3490
},
{
"epoch": 2.61,
"learning_rate": 4.3597409068261086e-05,
"loss": 1.4815,
"step": 3500
},
{
"epoch": 2.62,
"learning_rate": 4.3721973094170404e-05,
"loss": 1.437,
"step": 3510
},
{
"epoch": 2.63,
"learning_rate": 4.384653712007972e-05,
"loss": 1.4433,
"step": 3520
},
{
"epoch": 2.64,
"learning_rate": 4.397110114598904e-05,
"loss": 1.4698,
"step": 3530
},
{
"epoch": 2.64,
"learning_rate": 4.409566517189836e-05,
"loss": 1.4623,
"step": 3540
},
{
"epoch": 2.65,
"learning_rate": 4.422022919780768e-05,
"loss": 1.4473,
"step": 3550
},
{
"epoch": 2.66,
"learning_rate": 4.434479322371699e-05,
"loss": 1.4834,
"step": 3560
},
{
"epoch": 2.67,
"learning_rate": 4.446935724962631e-05,
"loss": 1.4281,
"step": 3570
},
{
"epoch": 2.67,
"learning_rate": 4.459392127553563e-05,
"loss": 1.4453,
"step": 3580
},
{
"epoch": 2.68,
"learning_rate": 4.4718485301444946e-05,
"loss": 1.4601,
"step": 3590
},
{
"epoch": 2.69,
"learning_rate": 4.4843049327354265e-05,
"loss": 1.4607,
"step": 3600
},
{
"epoch": 2.7,
"learning_rate": 4.496761335326358e-05,
"loss": 1.4767,
"step": 3610
},
{
"epoch": 2.7,
"learning_rate": 4.5092177379172895e-05,
"loss": 1.457,
"step": 3620
},
{
"epoch": 2.71,
"learning_rate": 4.5216741405082213e-05,
"loss": 1.438,
"step": 3630
},
{
"epoch": 2.72,
"learning_rate": 4.534130543099153e-05,
"loss": 1.4902,
"step": 3640
},
{
"epoch": 2.73,
"learning_rate": 4.546586945690085e-05,
"loss": 1.4313,
"step": 3650
},
{
"epoch": 2.73,
"learning_rate": 4.559043348281017e-05,
"loss": 1.4605,
"step": 3660
},
{
"epoch": 2.74,
"learning_rate": 4.571499750871948e-05,
"loss": 1.443,
"step": 3670
},
{
"epoch": 2.75,
"learning_rate": 4.58395615346288e-05,
"loss": 1.4121,
"step": 3680
},
{
"epoch": 2.76,
"learning_rate": 4.596412556053812e-05,
"loss": 1.4655,
"step": 3690
},
{
"epoch": 2.76,
"learning_rate": 4.6088689586447437e-05,
"loss": 1.4101,
"step": 3700
},
{
"epoch": 2.77,
"learning_rate": 4.6213253612356755e-05,
"loss": 1.4256,
"step": 3710
},
{
"epoch": 2.78,
"learning_rate": 4.6337817638266074e-05,
"loss": 1.4297,
"step": 3720
},
{
"epoch": 2.79,
"learning_rate": 4.6462381664175385e-05,
"loss": 1.4185,
"step": 3730
},
{
"epoch": 2.79,
"learning_rate": 4.6586945690084704e-05,
"loss": 1.436,
"step": 3740
},
{
"epoch": 2.8,
"learning_rate": 4.671150971599402e-05,
"loss": 1.444,
"step": 3750
},
{
"epoch": 2.81,
"learning_rate": 4.683607374190334e-05,
"loss": 1.4306,
"step": 3760
},
{
"epoch": 2.82,
"learning_rate": 4.696063776781266e-05,
"loss": 1.4601,
"step": 3770
},
{
"epoch": 2.82,
"learning_rate": 4.708520179372198e-05,
"loss": 1.4125,
"step": 3780
},
{
"epoch": 2.83,
"learning_rate": 4.720976581963129e-05,
"loss": 1.4321,
"step": 3790
},
{
"epoch": 2.84,
"learning_rate": 4.733432984554061e-05,
"loss": 1.4493,
"step": 3800
},
{
"epoch": 2.85,
"learning_rate": 4.745889387144993e-05,
"loss": 1.4558,
"step": 3810
},
{
"epoch": 2.85,
"learning_rate": 4.7583457897359246e-05,
"loss": 1.4495,
"step": 3820
},
{
"epoch": 2.86,
"learning_rate": 4.7708021923268564e-05,
"loss": 1.4771,
"step": 3830
},
{
"epoch": 2.87,
"learning_rate": 4.7832585949177876e-05,
"loss": 1.4699,
"step": 3840
},
{
"epoch": 2.88,
"learning_rate": 4.7957149975087194e-05,
"loss": 1.4411,
"step": 3850
},
{
"epoch": 2.88,
"learning_rate": 4.808171400099651e-05,
"loss": 1.462,
"step": 3860
},
{
"epoch": 2.89,
"learning_rate": 4.820627802690583e-05,
"loss": 1.43,
"step": 3870
},
{
"epoch": 2.9,
"learning_rate": 4.833084205281515e-05,
"loss": 1.4301,
"step": 3880
},
{
"epoch": 2.91,
"learning_rate": 4.845540607872447e-05,
"loss": 1.4786,
"step": 3890
},
{
"epoch": 2.91,
"learning_rate": 4.857997010463378e-05,
"loss": 1.4121,
"step": 3900
},
{
"epoch": 2.92,
"learning_rate": 4.87045341305431e-05,
"loss": 1.4373,
"step": 3910
},
{
"epoch": 2.93,
"learning_rate": 4.882909815645242e-05,
"loss": 1.4682,
"step": 3920
},
{
"epoch": 2.94,
"learning_rate": 4.8953662182361736e-05,
"loss": 1.4753,
"step": 3930
},
{
"epoch": 2.94,
"learning_rate": 4.9078226208271055e-05,
"loss": 1.4293,
"step": 3940
},
{
"epoch": 2.95,
"learning_rate": 4.920279023418037e-05,
"loss": 1.4134,
"step": 3950
},
{
"epoch": 2.96,
"learning_rate": 4.9327354260089685e-05,
"loss": 1.475,
"step": 3960
},
{
"epoch": 2.97,
"learning_rate": 4.9451918285999003e-05,
"loss": 1.4714,
"step": 3970
},
{
"epoch": 2.97,
"learning_rate": 4.957648231190832e-05,
"loss": 1.4427,
"step": 3980
},
{
"epoch": 2.98,
"learning_rate": 4.970104633781764e-05,
"loss": 1.459,
"step": 3990
},
{
"epoch": 2.99,
"learning_rate": 4.982561036372696e-05,
"loss": 1.4328,
"step": 4000
},
{
"epoch": 3.0,
"learning_rate": 4.995017438963627e-05,
"loss": 1.4384,
"step": 4010
},
{
"epoch": 3.0,
"eval_accuracy": 0.5223488628604444,
"eval_loss": 1.1049836874008179,
"eval_runtime": 71.5669,
"eval_samples_per_second": 266.031,
"eval_steps_per_second": 8.314,
"step": 4015
},
{
"epoch": 3.0,
"learning_rate": 4.999169573160605e-05,
"loss": 1.4314,
"step": 4020
},
{
"epoch": 3.01,
"learning_rate": 4.9977855284282795e-05,
"loss": 1.4481,
"step": 4030
},
{
"epoch": 3.02,
"learning_rate": 4.996401483695953e-05,
"loss": 1.4389,
"step": 4040
},
{
"epoch": 3.03,
"learning_rate": 4.995017438963627e-05,
"loss": 1.4285,
"step": 4050
},
{
"epoch": 3.03,
"learning_rate": 4.9936333942313016e-05,
"loss": 1.4432,
"step": 4060
},
{
"epoch": 3.04,
"learning_rate": 4.992249349498976e-05,
"loss": 1.4337,
"step": 4070
},
{
"epoch": 3.05,
"learning_rate": 4.9908653047666505e-05,
"loss": 1.4316,
"step": 4080
},
{
"epoch": 3.06,
"learning_rate": 4.989481260034324e-05,
"loss": 1.4227,
"step": 4090
},
{
"epoch": 3.06,
"learning_rate": 4.988097215301999e-05,
"loss": 1.463,
"step": 4100
},
{
"epoch": 3.07,
"learning_rate": 4.986713170569673e-05,
"loss": 1.4413,
"step": 4110
},
{
"epoch": 3.08,
"learning_rate": 4.985329125837348e-05,
"loss": 1.4387,
"step": 4120
},
{
"epoch": 3.09,
"learning_rate": 4.9839450811050214e-05,
"loss": 1.4093,
"step": 4130
},
{
"epoch": 3.09,
"learning_rate": 4.982561036372696e-05,
"loss": 1.4643,
"step": 4140
},
{
"epoch": 3.1,
"learning_rate": 4.98117699164037e-05,
"loss": 1.4502,
"step": 4150
},
{
"epoch": 3.11,
"learning_rate": 4.979792946908045e-05,
"loss": 1.429,
"step": 4160
},
{
"epoch": 3.12,
"learning_rate": 4.9784089021757186e-05,
"loss": 1.4507,
"step": 4170
},
{
"epoch": 3.12,
"learning_rate": 4.977024857443393e-05,
"loss": 1.4472,
"step": 4180
},
{
"epoch": 3.13,
"learning_rate": 4.975640812711067e-05,
"loss": 1.4724,
"step": 4190
},
{
"epoch": 3.14,
"learning_rate": 4.9742567679787413e-05,
"loss": 1.4394,
"step": 4200
},
{
"epoch": 3.15,
"learning_rate": 4.972872723246416e-05,
"loss": 1.4227,
"step": 4210
},
{
"epoch": 3.15,
"learning_rate": 4.9714886785140896e-05,
"loss": 1.467,
"step": 4220
},
{
"epoch": 3.16,
"learning_rate": 4.970104633781764e-05,
"loss": 1.4317,
"step": 4230
},
{
"epoch": 3.17,
"learning_rate": 4.968720589049438e-05,
"loss": 1.4344,
"step": 4240
},
{
"epoch": 3.18,
"learning_rate": 4.967336544317113e-05,
"loss": 1.4634,
"step": 4250
},
{
"epoch": 3.18,
"learning_rate": 4.965952499584787e-05,
"loss": 1.465,
"step": 4260
},
{
"epoch": 3.19,
"learning_rate": 4.964568454852461e-05,
"loss": 1.4404,
"step": 4270
},
{
"epoch": 3.2,
"learning_rate": 4.963184410120135e-05,
"loss": 1.4193,
"step": 4280
},
{
"epoch": 3.21,
"learning_rate": 4.9618003653878095e-05,
"loss": 1.4231,
"step": 4290
},
{
"epoch": 3.21,
"learning_rate": 4.960416320655484e-05,
"loss": 1.4226,
"step": 4300
},
{
"epoch": 3.22,
"learning_rate": 4.9590322759231584e-05,
"loss": 1.4259,
"step": 4310
},
{
"epoch": 3.23,
"learning_rate": 4.957648231190832e-05,
"loss": 1.4121,
"step": 4320
},
{
"epoch": 3.23,
"learning_rate": 4.956264186458506e-05,
"loss": 1.4348,
"step": 4330
},
{
"epoch": 3.24,
"learning_rate": 4.954880141726181e-05,
"loss": 1.4533,
"step": 4340
},
{
"epoch": 3.25,
"learning_rate": 4.953496096993855e-05,
"loss": 1.4433,
"step": 4350
},
{
"epoch": 3.26,
"learning_rate": 4.9521120522615294e-05,
"loss": 1.3939,
"step": 4360
},
{
"epoch": 3.26,
"learning_rate": 4.950728007529203e-05,
"loss": 1.4802,
"step": 4370
},
{
"epoch": 3.27,
"learning_rate": 4.9493439627968776e-05,
"loss": 1.4619,
"step": 4380
},
{
"epoch": 3.28,
"learning_rate": 4.947959918064552e-05,
"loss": 1.4235,
"step": 4390
},
{
"epoch": 3.29,
"learning_rate": 4.9465758733322266e-05,
"loss": 1.4282,
"step": 4400
},
{
"epoch": 3.29,
"learning_rate": 4.9451918285999003e-05,
"loss": 1.4489,
"step": 4410
},
{
"epoch": 3.3,
"learning_rate": 4.943807783867575e-05,
"loss": 1.3897,
"step": 4420
},
{
"epoch": 3.31,
"learning_rate": 4.942423739135249e-05,
"loss": 1.453,
"step": 4430
},
{
"epoch": 3.32,
"learning_rate": 4.941039694402924e-05,
"loss": 1.4129,
"step": 4440
},
{
"epoch": 3.32,
"learning_rate": 4.9396556496705975e-05,
"loss": 1.4518,
"step": 4450
},
{
"epoch": 3.33,
"learning_rate": 4.938271604938271e-05,
"loss": 1.4226,
"step": 4460
},
{
"epoch": 3.34,
"learning_rate": 4.936887560205946e-05,
"loss": 1.446,
"step": 4470
},
{
"epoch": 3.35,
"learning_rate": 4.93550351547362e-05,
"loss": 1.4333,
"step": 4480
},
{
"epoch": 3.35,
"learning_rate": 4.934119470741295e-05,
"loss": 1.4006,
"step": 4490
},
{
"epoch": 3.36,
"learning_rate": 4.9327354260089685e-05,
"loss": 1.4367,
"step": 4500
},
{
"epoch": 3.37,
"learning_rate": 4.931351381276643e-05,
"loss": 1.4393,
"step": 4510
},
{
"epoch": 3.38,
"learning_rate": 4.9299673365443174e-05,
"loss": 1.4264,
"step": 4520
},
{
"epoch": 3.38,
"learning_rate": 4.928583291811992e-05,
"loss": 1.4482,
"step": 4530
},
{
"epoch": 3.39,
"learning_rate": 4.927199247079666e-05,
"loss": 1.4609,
"step": 4540
},
{
"epoch": 3.4,
"learning_rate": 4.92581520234734e-05,
"loss": 1.4062,
"step": 4550
},
{
"epoch": 3.41,
"learning_rate": 4.9244311576150146e-05,
"loss": 1.4407,
"step": 4560
},
{
"epoch": 3.41,
"learning_rate": 4.923047112882689e-05,
"loss": 1.4126,
"step": 4570
},
{
"epoch": 3.42,
"learning_rate": 4.921663068150363e-05,
"loss": 1.4211,
"step": 4580
},
{
"epoch": 3.43,
"learning_rate": 4.920279023418037e-05,
"loss": 1.4147,
"step": 4590
},
{
"epoch": 3.44,
"learning_rate": 4.918894978685711e-05,
"loss": 1.4696,
"step": 4600
},
{
"epoch": 3.44,
"learning_rate": 4.9175109339533856e-05,
"loss": 1.423,
"step": 4610
},
{
"epoch": 3.45,
"learning_rate": 4.91612688922106e-05,
"loss": 1.4147,
"step": 4620
},
{
"epoch": 3.46,
"learning_rate": 4.914742844488734e-05,
"loss": 1.3997,
"step": 4630
},
{
"epoch": 3.47,
"learning_rate": 4.913358799756408e-05,
"loss": 1.4233,
"step": 4640
},
{
"epoch": 3.47,
"learning_rate": 4.911974755024083e-05,
"loss": 1.4281,
"step": 4650
},
{
"epoch": 3.48,
"learning_rate": 4.910590710291757e-05,
"loss": 1.4351,
"step": 4660
},
{
"epoch": 3.49,
"learning_rate": 4.909206665559431e-05,
"loss": 1.3996,
"step": 4670
},
{
"epoch": 3.5,
"learning_rate": 4.9078226208271055e-05,
"loss": 1.3975,
"step": 4680
},
{
"epoch": 3.5,
"learning_rate": 4.906438576094779e-05,
"loss": 1.4243,
"step": 4690
},
{
"epoch": 3.51,
"learning_rate": 4.9050545313624544e-05,
"loss": 1.4256,
"step": 4700
},
{
"epoch": 3.52,
"learning_rate": 4.903670486630128e-05,
"loss": 1.3872,
"step": 4710
},
{
"epoch": 3.53,
"learning_rate": 4.9022864418978026e-05,
"loss": 1.4723,
"step": 4720
},
{
"epoch": 3.53,
"learning_rate": 4.9009023971654764e-05,
"loss": 1.4494,
"step": 4730
},
{
"epoch": 3.54,
"learning_rate": 4.899518352433151e-05,
"loss": 1.4269,
"step": 4740
},
{
"epoch": 3.55,
"learning_rate": 4.8981343077008254e-05,
"loss": 1.3806,
"step": 4750
},
{
"epoch": 3.56,
"learning_rate": 4.896750262968499e-05,
"loss": 1.4375,
"step": 4760
},
{
"epoch": 3.56,
"learning_rate": 4.8953662182361736e-05,
"loss": 1.4542,
"step": 4770
},
{
"epoch": 3.57,
"learning_rate": 4.8939821735038474e-05,
"loss": 1.4699,
"step": 4780
},
{
"epoch": 3.58,
"learning_rate": 4.8925981287715225e-05,
"loss": 1.4409,
"step": 4790
},
{
"epoch": 3.59,
"learning_rate": 4.891214084039196e-05,
"loss": 1.404,
"step": 4800
},
{
"epoch": 3.59,
"learning_rate": 4.889830039306871e-05,
"loss": 1.3994,
"step": 4810
},
{
"epoch": 3.6,
"learning_rate": 4.8884459945745446e-05,
"loss": 1.4465,
"step": 4820
},
{
"epoch": 3.61,
"learning_rate": 4.887061949842219e-05,
"loss": 1.4178,
"step": 4830
},
{
"epoch": 3.62,
"learning_rate": 4.8856779051098935e-05,
"loss": 1.4191,
"step": 4840
},
{
"epoch": 3.62,
"learning_rate": 4.884293860377568e-05,
"loss": 1.4385,
"step": 4850
},
{
"epoch": 3.63,
"learning_rate": 4.882909815645242e-05,
"loss": 1.3739,
"step": 4860
},
{
"epoch": 3.64,
"learning_rate": 4.881525770912916e-05,
"loss": 1.3938,
"step": 4870
},
{
"epoch": 3.65,
"learning_rate": 4.880141726180591e-05,
"loss": 1.4061,
"step": 4880
},
{
"epoch": 3.65,
"learning_rate": 4.8787576814482645e-05,
"loss": 1.4232,
"step": 4890
},
{
"epoch": 3.66,
"learning_rate": 4.877373636715939e-05,
"loss": 1.4096,
"step": 4900
},
{
"epoch": 3.67,
"learning_rate": 4.875989591983613e-05,
"loss": 1.4317,
"step": 4910
},
{
"epoch": 3.68,
"learning_rate": 4.874605547251287e-05,
"loss": 1.4204,
"step": 4920
},
{
"epoch": 3.68,
"learning_rate": 4.8732215025189616e-05,
"loss": 1.4,
"step": 4930
},
{
"epoch": 3.69,
"learning_rate": 4.871837457786636e-05,
"loss": 1.4552,
"step": 4940
},
{
"epoch": 3.7,
"learning_rate": 4.87045341305431e-05,
"loss": 1.4379,
"step": 4950
},
{
"epoch": 3.71,
"learning_rate": 4.8690693683219844e-05,
"loss": 1.424,
"step": 4960
},
{
"epoch": 3.71,
"learning_rate": 4.867685323589659e-05,
"loss": 1.349,
"step": 4970
},
{
"epoch": 3.72,
"learning_rate": 4.866301278857333e-05,
"loss": 1.4477,
"step": 4980
},
{
"epoch": 3.73,
"learning_rate": 4.864917234125007e-05,
"loss": 1.3998,
"step": 4990
},
{
"epoch": 3.74,
"learning_rate": 4.8635331893926815e-05,
"loss": 1.4161,
"step": 5000
},
{
"epoch": 3.74,
"learning_rate": 4.862149144660355e-05,
"loss": 1.4186,
"step": 5010
},
{
"epoch": 3.75,
"learning_rate": 4.86076509992803e-05,
"loss": 1.3769,
"step": 5020
},
{
"epoch": 3.76,
"learning_rate": 4.859381055195704e-05,
"loss": 1.414,
"step": 5030
},
{
"epoch": 3.77,
"learning_rate": 4.857997010463378e-05,
"loss": 1.4173,
"step": 5040
},
{
"epoch": 3.77,
"learning_rate": 4.8566129657310525e-05,
"loss": 1.4614,
"step": 5050
},
{
"epoch": 3.78,
"learning_rate": 4.855228920998727e-05,
"loss": 1.4108,
"step": 5060
},
{
"epoch": 3.79,
"learning_rate": 4.8538448762664014e-05,
"loss": 1.4038,
"step": 5070
},
{
"epoch": 3.8,
"learning_rate": 4.852460831534075e-05,
"loss": 1.4466,
"step": 5080
},
{
"epoch": 3.8,
"learning_rate": 4.85107678680175e-05,
"loss": 1.4087,
"step": 5090
},
{
"epoch": 3.81,
"learning_rate": 4.849692742069424e-05,
"loss": 1.4341,
"step": 5100
},
{
"epoch": 3.82,
"learning_rate": 4.8483086973370986e-05,
"loss": 1.4369,
"step": 5110
},
{
"epoch": 3.83,
"learning_rate": 4.8469246526047724e-05,
"loss": 1.4214,
"step": 5120
},
{
"epoch": 3.83,
"learning_rate": 4.845540607872447e-05,
"loss": 1.4008,
"step": 5130
},
{
"epoch": 3.84,
"learning_rate": 4.8441565631401207e-05,
"loss": 1.4439,
"step": 5140
},
{
"epoch": 3.85,
"learning_rate": 4.842772518407795e-05,
"loss": 1.3974,
"step": 5150
},
{
"epoch": 3.86,
"learning_rate": 4.8413884736754696e-05,
"loss": 1.4179,
"step": 5160
},
{
"epoch": 3.86,
"learning_rate": 4.8400044289431434e-05,
"loss": 1.402,
"step": 5170
},
{
"epoch": 3.87,
"learning_rate": 4.838620384210818e-05,
"loss": 1.42,
"step": 5180
},
{
"epoch": 3.88,
"learning_rate": 4.837236339478492e-05,
"loss": 1.3791,
"step": 5190
},
{
"epoch": 3.88,
"learning_rate": 4.835852294746167e-05,
"loss": 1.3857,
"step": 5200
},
{
"epoch": 3.89,
"learning_rate": 4.8344682500138405e-05,
"loss": 1.4019,
"step": 5210
},
{
"epoch": 3.9,
"learning_rate": 4.833084205281515e-05,
"loss": 1.4408,
"step": 5220
},
{
"epoch": 3.91,
"learning_rate": 4.831700160549189e-05,
"loss": 1.3808,
"step": 5230
},
{
"epoch": 3.91,
"learning_rate": 4.830316115816864e-05,
"loss": 1.4029,
"step": 5240
},
{
"epoch": 3.92,
"learning_rate": 4.828932071084538e-05,
"loss": 1.4293,
"step": 5250
},
{
"epoch": 3.93,
"learning_rate": 4.827548026352212e-05,
"loss": 1.4492,
"step": 5260
},
{
"epoch": 3.94,
"learning_rate": 4.826163981619886e-05,
"loss": 1.453,
"step": 5270
},
{
"epoch": 3.94,
"learning_rate": 4.8247799368875604e-05,
"loss": 1.4284,
"step": 5280
},
{
"epoch": 3.95,
"learning_rate": 4.823395892155235e-05,
"loss": 1.4285,
"step": 5290
},
{
"epoch": 3.96,
"learning_rate": 4.822011847422909e-05,
"loss": 1.4237,
"step": 5300
},
{
"epoch": 3.97,
"learning_rate": 4.820627802690583e-05,
"loss": 1.4279,
"step": 5310
},
{
"epoch": 3.97,
"learning_rate": 4.819243757958257e-05,
"loss": 1.4248,
"step": 5320
},
{
"epoch": 3.98,
"learning_rate": 4.817859713225932e-05,
"loss": 1.373,
"step": 5330
},
{
"epoch": 3.99,
"learning_rate": 4.816475668493606e-05,
"loss": 1.4248,
"step": 5340
},
{
"epoch": 4.0,
"learning_rate": 4.81509162376128e-05,
"loss": 1.4538,
"step": 5350
},
{
"epoch": 4.0,
"eval_accuracy": 0.543253322128263,
"eval_loss": 1.0751378536224365,
"eval_runtime": 77.824,
"eval_samples_per_second": 244.642,
"eval_steps_per_second": 7.645,
"step": 5354
},
{
"epoch": 4.0,
"learning_rate": 4.813707579028954e-05,
"loss": 1.4419,
"step": 5360
},
{
"epoch": 4.01,
"learning_rate": 4.8123235342966286e-05,
"loss": 1.4518,
"step": 5370
},
{
"epoch": 4.02,
"learning_rate": 4.810939489564303e-05,
"loss": 1.4032,
"step": 5380
},
{
"epoch": 4.03,
"learning_rate": 4.8095554448319775e-05,
"loss": 1.3933,
"step": 5390
},
{
"epoch": 4.03,
"learning_rate": 4.808171400099651e-05,
"loss": 1.4208,
"step": 5400
},
{
"epoch": 4.04,
"learning_rate": 4.806787355367326e-05,
"loss": 1.35,
"step": 5410
},
{
"epoch": 4.05,
"learning_rate": 4.805403310635e-05,
"loss": 1.4053,
"step": 5420
},
{
"epoch": 4.06,
"learning_rate": 4.804019265902675e-05,
"loss": 1.4478,
"step": 5430
},
{
"epoch": 4.06,
"learning_rate": 4.8026352211703485e-05,
"loss": 1.4175,
"step": 5440
},
{
"epoch": 4.07,
"learning_rate": 4.801251176438022e-05,
"loss": 1.4047,
"step": 5450
},
{
"epoch": 4.08,
"learning_rate": 4.799867131705697e-05,
"loss": 1.3586,
"step": 5460
},
{
"epoch": 4.09,
"learning_rate": 4.798483086973371e-05,
"loss": 1.4391,
"step": 5470
},
{
"epoch": 4.09,
"learning_rate": 4.7970990422410457e-05,
"loss": 1.4011,
"step": 5480
},
{
"epoch": 4.1,
"learning_rate": 4.7957149975087194e-05,
"loss": 1.3836,
"step": 5490
},
{
"epoch": 4.11,
"learning_rate": 4.794330952776394e-05,
"loss": 1.3959,
"step": 5500
},
{
"epoch": 4.12,
"learning_rate": 4.7929469080440684e-05,
"loss": 1.4236,
"step": 5510
},
{
"epoch": 4.12,
"learning_rate": 4.791562863311743e-05,
"loss": 1.3827,
"step": 5520
},
{
"epoch": 4.13,
"learning_rate": 4.7901788185794166e-05,
"loss": 1.3631,
"step": 5530
},
{
"epoch": 4.14,
"learning_rate": 4.788794773847091e-05,
"loss": 1.4077,
"step": 5540
},
{
"epoch": 4.15,
"learning_rate": 4.787410729114765e-05,
"loss": 1.38,
"step": 5550
},
{
"epoch": 4.15,
"learning_rate": 4.78602668438244e-05,
"loss": 1.4058,
"step": 5560
},
{
"epoch": 4.16,
"learning_rate": 4.784642639650114e-05,
"loss": 1.5071,
"step": 5570
},
{
"epoch": 4.17,
"learning_rate": 4.7832585949177876e-05,
"loss": 1.4314,
"step": 5580
},
{
"epoch": 4.18,
"learning_rate": 4.781874550185462e-05,
"loss": 1.3996,
"step": 5590
},
{
"epoch": 4.18,
"learning_rate": 4.7804905054531365e-05,
"loss": 1.4102,
"step": 5600
},
{
"epoch": 4.19,
"learning_rate": 4.779106460720811e-05,
"loss": 1.358,
"step": 5610
},
{
"epoch": 4.2,
"learning_rate": 4.777722415988485e-05,
"loss": 1.3838,
"step": 5620
},
{
"epoch": 4.21,
"learning_rate": 4.776338371256159e-05,
"loss": 1.3882,
"step": 5630
},
{
"epoch": 4.21,
"learning_rate": 4.774954326523833e-05,
"loss": 1.3946,
"step": 5640
},
{
"epoch": 4.22,
"learning_rate": 4.773570281791508e-05,
"loss": 1.4307,
"step": 5650
},
{
"epoch": 4.23,
"learning_rate": 4.772186237059182e-05,
"loss": 1.4147,
"step": 5660
},
{
"epoch": 4.24,
"learning_rate": 4.7708021923268564e-05,
"loss": 1.3853,
"step": 5670
},
{
"epoch": 4.24,
"learning_rate": 4.76941814759453e-05,
"loss": 1.431,
"step": 5680
},
{
"epoch": 4.25,
"learning_rate": 4.768034102862205e-05,
"loss": 1.429,
"step": 5690
},
{
"epoch": 4.26,
"learning_rate": 4.766650058129879e-05,
"loss": 1.4208,
"step": 5700
},
{
"epoch": 4.27,
"learning_rate": 4.765266013397553e-05,
"loss": 1.4484,
"step": 5710
},
{
"epoch": 4.27,
"learning_rate": 4.7638819686652274e-05,
"loss": 1.4017,
"step": 5720
},
{
"epoch": 4.28,
"learning_rate": 4.762497923932902e-05,
"loss": 1.3968,
"step": 5730
},
{
"epoch": 4.29,
"learning_rate": 4.761113879200576e-05,
"loss": 1.416,
"step": 5740
},
{
"epoch": 4.3,
"learning_rate": 4.75972983446825e-05,
"loss": 1.4102,
"step": 5750
},
{
"epoch": 4.3,
"learning_rate": 4.7583457897359246e-05,
"loss": 1.3917,
"step": 5760
},
{
"epoch": 4.31,
"learning_rate": 4.7569617450035983e-05,
"loss": 1.392,
"step": 5770
},
{
"epoch": 4.32,
"learning_rate": 4.755577700271273e-05,
"loss": 1.3988,
"step": 5780
},
{
"epoch": 4.33,
"learning_rate": 4.754193655538947e-05,
"loss": 1.4231,
"step": 5790
},
{
"epoch": 4.33,
"learning_rate": 4.752809610806622e-05,
"loss": 1.4151,
"step": 5800
},
{
"epoch": 4.34,
"learning_rate": 4.7514255660742955e-05,
"loss": 1.3657,
"step": 5810
},
{
"epoch": 4.35,
"learning_rate": 4.75004152134197e-05,
"loss": 1.3995,
"step": 5820
},
{
"epoch": 4.36,
"learning_rate": 4.7486574766096445e-05,
"loss": 1.3614,
"step": 5830
},
{
"epoch": 4.36,
"learning_rate": 4.747273431877319e-05,
"loss": 1.3963,
"step": 5840
},
{
"epoch": 4.37,
"learning_rate": 4.745889387144993e-05,
"loss": 1.3631,
"step": 5850
},
{
"epoch": 4.38,
"learning_rate": 4.7445053424126665e-05,
"loss": 1.3969,
"step": 5860
},
{
"epoch": 4.39,
"learning_rate": 4.7431212976803416e-05,
"loss": 1.3671,
"step": 5870
},
{
"epoch": 4.39,
"learning_rate": 4.7417372529480154e-05,
"loss": 1.4448,
"step": 5880
},
{
"epoch": 4.4,
"learning_rate": 4.74035320821569e-05,
"loss": 1.4079,
"step": 5890
},
{
"epoch": 4.41,
"learning_rate": 4.738969163483364e-05,
"loss": 1.4098,
"step": 5900
},
{
"epoch": 4.42,
"learning_rate": 4.737585118751038e-05,
"loss": 1.3793,
"step": 5910
},
{
"epoch": 4.42,
"learning_rate": 4.7362010740187126e-05,
"loss": 1.4664,
"step": 5920
},
{
"epoch": 4.43,
"learning_rate": 4.734817029286387e-05,
"loss": 1.4201,
"step": 5930
},
{
"epoch": 4.44,
"learning_rate": 4.733432984554061e-05,
"loss": 1.4247,
"step": 5940
},
{
"epoch": 4.45,
"learning_rate": 4.732048939821735e-05,
"loss": 1.4489,
"step": 5950
},
{
"epoch": 4.45,
"learning_rate": 4.73066489508941e-05,
"loss": 1.4406,
"step": 5960
},
{
"epoch": 4.46,
"learning_rate": 4.729280850357084e-05,
"loss": 1.384,
"step": 5970
},
{
"epoch": 4.47,
"learning_rate": 4.727896805624758e-05,
"loss": 1.385,
"step": 5980
},
{
"epoch": 4.48,
"learning_rate": 4.726512760892432e-05,
"loss": 1.4394,
"step": 5990
},
{
"epoch": 4.48,
"learning_rate": 4.725128716160106e-05,
"loss": 1.4201,
"step": 6000
},
{
"epoch": 4.49,
"learning_rate": 4.723744671427781e-05,
"loss": 1.4021,
"step": 6010
},
{
"epoch": 4.5,
"learning_rate": 4.722360626695455e-05,
"loss": 1.43,
"step": 6020
},
{
"epoch": 4.51,
"learning_rate": 4.720976581963129e-05,
"loss": 1.428,
"step": 6030
},
{
"epoch": 4.51,
"learning_rate": 4.7195925372308035e-05,
"loss": 1.4432,
"step": 6040
},
{
"epoch": 4.52,
"learning_rate": 4.718208492498478e-05,
"loss": 1.392,
"step": 6050
},
{
"epoch": 4.53,
"learning_rate": 4.7168244477661524e-05,
"loss": 1.4106,
"step": 6060
},
{
"epoch": 4.53,
"learning_rate": 4.715440403033826e-05,
"loss": 1.3935,
"step": 6070
},
{
"epoch": 4.54,
"learning_rate": 4.7140563583015006e-05,
"loss": 1.4493,
"step": 6080
},
{
"epoch": 4.55,
"learning_rate": 4.7126723135691744e-05,
"loss": 1.3812,
"step": 6090
},
{
"epoch": 4.56,
"learning_rate": 4.7112882688368496e-05,
"loss": 1.3947,
"step": 6100
},
{
"epoch": 4.56,
"learning_rate": 4.7099042241045234e-05,
"loss": 1.3994,
"step": 6110
},
{
"epoch": 4.57,
"learning_rate": 4.708520179372198e-05,
"loss": 1.3995,
"step": 6120
},
{
"epoch": 4.58,
"learning_rate": 4.7071361346398716e-05,
"loss": 1.3578,
"step": 6130
},
{
"epoch": 4.59,
"learning_rate": 4.705752089907546e-05,
"loss": 1.4116,
"step": 6140
},
{
"epoch": 4.59,
"learning_rate": 4.7043680451752205e-05,
"loss": 1.4017,
"step": 6150
},
{
"epoch": 4.6,
"learning_rate": 4.702984000442894e-05,
"loss": 1.4036,
"step": 6160
},
{
"epoch": 4.61,
"learning_rate": 4.701599955710569e-05,
"loss": 1.4048,
"step": 6170
},
{
"epoch": 4.62,
"learning_rate": 4.7002159109782426e-05,
"loss": 1.3771,
"step": 6180
},
{
"epoch": 4.62,
"learning_rate": 4.698831866245918e-05,
"loss": 1.3827,
"step": 6190
},
{
"epoch": 4.63,
"learning_rate": 4.6974478215135915e-05,
"loss": 1.3996,
"step": 6200
},
{
"epoch": 4.64,
"learning_rate": 4.696063776781266e-05,
"loss": 1.4376,
"step": 6210
},
{
"epoch": 4.65,
"learning_rate": 4.69467973204894e-05,
"loss": 1.4509,
"step": 6220
},
{
"epoch": 4.65,
"learning_rate": 4.693295687316614e-05,
"loss": 1.3885,
"step": 6230
},
{
"epoch": 4.66,
"learning_rate": 4.691911642584289e-05,
"loss": 1.3977,
"step": 6240
},
{
"epoch": 4.67,
"learning_rate": 4.690527597851963e-05,
"loss": 1.4068,
"step": 6250
},
{
"epoch": 4.68,
"learning_rate": 4.689143553119637e-05,
"loss": 1.4441,
"step": 6260
},
{
"epoch": 4.68,
"learning_rate": 4.687759508387311e-05,
"loss": 1.4007,
"step": 6270
},
{
"epoch": 4.69,
"learning_rate": 4.686375463654986e-05,
"loss": 1.424,
"step": 6280
},
{
"epoch": 4.7,
"learning_rate": 4.6849914189226596e-05,
"loss": 1.3743,
"step": 6290
},
{
"epoch": 4.71,
"learning_rate": 4.683607374190334e-05,
"loss": 1.3881,
"step": 6300
},
{
"epoch": 4.71,
"learning_rate": 4.682223329458008e-05,
"loss": 1.3418,
"step": 6310
},
{
"epoch": 4.72,
"learning_rate": 4.6808392847256824e-05,
"loss": 1.3846,
"step": 6320
},
{
"epoch": 4.73,
"learning_rate": 4.679455239993357e-05,
"loss": 1.3643,
"step": 6330
},
{
"epoch": 4.74,
"learning_rate": 4.678071195261031e-05,
"loss": 1.3564,
"step": 6340
},
{
"epoch": 4.74,
"learning_rate": 4.676687150528705e-05,
"loss": 1.4053,
"step": 6350
},
{
"epoch": 4.75,
"learning_rate": 4.6753031057963795e-05,
"loss": 1.3844,
"step": 6360
},
{
"epoch": 4.76,
"learning_rate": 4.673919061064054e-05,
"loss": 1.4126,
"step": 6370
},
{
"epoch": 4.77,
"learning_rate": 4.6725350163317285e-05,
"loss": 1.4251,
"step": 6380
},
{
"epoch": 4.77,
"learning_rate": 4.671150971599402e-05,
"loss": 1.4588,
"step": 6390
},
{
"epoch": 4.78,
"learning_rate": 4.669766926867077e-05,
"loss": 1.3842,
"step": 6400
},
{
"epoch": 4.79,
"learning_rate": 4.6683828821347505e-05,
"loss": 1.3918,
"step": 6410
},
{
"epoch": 4.8,
"learning_rate": 4.666998837402425e-05,
"loss": 1.3734,
"step": 6420
},
{
"epoch": 4.8,
"learning_rate": 4.6656147926700994e-05,
"loss": 1.3982,
"step": 6430
},
{
"epoch": 4.81,
"learning_rate": 4.664230747937773e-05,
"loss": 1.4145,
"step": 6440
},
{
"epoch": 4.82,
"learning_rate": 4.662846703205448e-05,
"loss": 1.4139,
"step": 6450
},
{
"epoch": 4.83,
"learning_rate": 4.661462658473122e-05,
"loss": 1.3921,
"step": 6460
},
{
"epoch": 4.83,
"learning_rate": 4.6600786137407966e-05,
"loss": 1.401,
"step": 6470
},
{
"epoch": 4.84,
"learning_rate": 4.6586945690084704e-05,
"loss": 1.4353,
"step": 6480
},
{
"epoch": 4.85,
"learning_rate": 4.657310524276145e-05,
"loss": 1.4499,
"step": 6490
},
{
"epoch": 4.86,
"learning_rate": 4.655926479543819e-05,
"loss": 1.3757,
"step": 6500
},
{
"epoch": 4.86,
"learning_rate": 4.654542434811494e-05,
"loss": 1.3756,
"step": 6510
},
{
"epoch": 4.87,
"learning_rate": 4.6531583900791676e-05,
"loss": 1.3971,
"step": 6520
},
{
"epoch": 4.88,
"learning_rate": 4.651774345346842e-05,
"loss": 1.4116,
"step": 6530
},
{
"epoch": 4.89,
"learning_rate": 4.650390300614516e-05,
"loss": 1.4013,
"step": 6540
},
{
"epoch": 4.89,
"learning_rate": 4.64900625588219e-05,
"loss": 1.3837,
"step": 6550
},
{
"epoch": 4.9,
"learning_rate": 4.647622211149865e-05,
"loss": 1.3922,
"step": 6560
},
{
"epoch": 4.91,
"learning_rate": 4.6462381664175385e-05,
"loss": 1.4027,
"step": 6570
},
{
"epoch": 4.92,
"learning_rate": 4.644854121685213e-05,
"loss": 1.3823,
"step": 6580
},
{
"epoch": 4.92,
"learning_rate": 4.6434700769528875e-05,
"loss": 1.4029,
"step": 6590
},
{
"epoch": 4.93,
"learning_rate": 4.642086032220562e-05,
"loss": 1.3945,
"step": 6600
},
{
"epoch": 4.94,
"learning_rate": 4.640701987488236e-05,
"loss": 1.4647,
"step": 6610
},
{
"epoch": 4.95,
"learning_rate": 4.63931794275591e-05,
"loss": 1.4008,
"step": 6620
},
{
"epoch": 4.95,
"learning_rate": 4.637933898023584e-05,
"loss": 1.3769,
"step": 6630
},
{
"epoch": 4.96,
"learning_rate": 4.636549853291259e-05,
"loss": 1.4173,
"step": 6640
},
{
"epoch": 4.97,
"learning_rate": 4.635165808558933e-05,
"loss": 1.403,
"step": 6650
},
{
"epoch": 4.98,
"learning_rate": 4.6337817638266074e-05,
"loss": 1.4031,
"step": 6660
},
{
"epoch": 4.98,
"learning_rate": 4.632397719094281e-05,
"loss": 1.4162,
"step": 6670
},
{
"epoch": 4.99,
"learning_rate": 4.6310136743619556e-05,
"loss": 1.3698,
"step": 6680
},
{
"epoch": 5.0,
"learning_rate": 4.62962962962963e-05,
"loss": 1.3928,
"step": 6690
},
{
"epoch": 5.0,
"eval_accuracy": 0.543988654866327,
"eval_loss": 1.0603728294372559,
"eval_runtime": 70.717,
"eval_samples_per_second": 269.228,
"eval_steps_per_second": 8.414,
"step": 6692
},
{
"epoch": 5.01,
"learning_rate": 4.628245584897304e-05,
"loss": 1.3706,
"step": 6700
},
{
"epoch": 5.01,
"learning_rate": 4.626861540164978e-05,
"loss": 1.3631,
"step": 6710
},
{
"epoch": 5.02,
"learning_rate": 4.625477495432652e-05,
"loss": 1.3879,
"step": 6720
},
{
"epoch": 5.03,
"learning_rate": 4.624093450700327e-05,
"loss": 1.3838,
"step": 6730
},
{
"epoch": 5.04,
"learning_rate": 4.622709405968001e-05,
"loss": 1.3358,
"step": 6740
},
{
"epoch": 5.04,
"learning_rate": 4.6213253612356755e-05,
"loss": 1.4095,
"step": 6750
},
{
"epoch": 5.05,
"learning_rate": 4.619941316503349e-05,
"loss": 1.4021,
"step": 6760
},
{
"epoch": 5.06,
"learning_rate": 4.618557271771024e-05,
"loss": 1.428,
"step": 6770
},
{
"epoch": 5.07,
"learning_rate": 4.617173227038698e-05,
"loss": 1.3986,
"step": 6780
},
{
"epoch": 5.07,
"learning_rate": 4.615789182306373e-05,
"loss": 1.4323,
"step": 6790
},
{
"epoch": 5.08,
"learning_rate": 4.6144051375740465e-05,
"loss": 1.412,
"step": 6800
},
{
"epoch": 5.09,
"learning_rate": 4.613021092841721e-05,
"loss": 1.3746,
"step": 6810
},
{
"epoch": 5.1,
"learning_rate": 4.6116370481093954e-05,
"loss": 1.4157,
"step": 6820
},
{
"epoch": 5.1,
"learning_rate": 4.610253003377069e-05,
"loss": 1.3875,
"step": 6830
},
{
"epoch": 5.11,
"learning_rate": 4.6088689586447437e-05,
"loss": 1.374,
"step": 6840
},
{
"epoch": 5.12,
"learning_rate": 4.6074849139124174e-05,
"loss": 1.4234,
"step": 6850
},
{
"epoch": 5.13,
"learning_rate": 4.606100869180092e-05,
"loss": 1.3691,
"step": 6860
},
{
"epoch": 5.13,
"learning_rate": 4.6047168244477664e-05,
"loss": 1.4133,
"step": 6870
},
{
"epoch": 5.14,
"learning_rate": 4.603332779715441e-05,
"loss": 1.3817,
"step": 6880
},
{
"epoch": 5.15,
"learning_rate": 4.6019487349831146e-05,
"loss": 1.4001,
"step": 6890
},
{
"epoch": 5.16,
"learning_rate": 4.600564690250789e-05,
"loss": 1.3531,
"step": 6900
},
{
"epoch": 5.16,
"learning_rate": 4.5991806455184635e-05,
"loss": 1.4091,
"step": 6910
},
{
"epoch": 5.17,
"learning_rate": 4.597796600786138e-05,
"loss": 1.3751,
"step": 6920
},
{
"epoch": 5.18,
"learning_rate": 4.596412556053812e-05,
"loss": 1.4065,
"step": 6930
},
{
"epoch": 5.18,
"learning_rate": 4.595028511321486e-05,
"loss": 1.3865,
"step": 6940
},
{
"epoch": 5.19,
"learning_rate": 4.59364446658916e-05,
"loss": 1.3545,
"step": 6950
},
{
"epoch": 5.2,
"learning_rate": 4.5922604218568345e-05,
"loss": 1.3887,
"step": 6960
},
{
"epoch": 5.21,
"learning_rate": 4.590876377124509e-05,
"loss": 1.3833,
"step": 6970
},
{
"epoch": 5.21,
"learning_rate": 4.589492332392183e-05,
"loss": 1.4187,
"step": 6980
},
{
"epoch": 5.22,
"learning_rate": 4.588108287659857e-05,
"loss": 1.3917,
"step": 6990
},
{
"epoch": 5.23,
"learning_rate": 4.586724242927532e-05,
"loss": 1.36,
"step": 7000
},
{
"epoch": 5.24,
"learning_rate": 4.585340198195206e-05,
"loss": 1.4171,
"step": 7010
},
{
"epoch": 5.24,
"learning_rate": 4.58395615346288e-05,
"loss": 1.3811,
"step": 7020
},
{
"epoch": 5.25,
"learning_rate": 4.5825721087305544e-05,
"loss": 1.419,
"step": 7030
},
{
"epoch": 5.26,
"learning_rate": 4.581188063998228e-05,
"loss": 1.3662,
"step": 7040
},
{
"epoch": 5.27,
"learning_rate": 4.579804019265903e-05,
"loss": 1.4039,
"step": 7050
},
{
"epoch": 5.27,
"learning_rate": 4.578419974533577e-05,
"loss": 1.4253,
"step": 7060
},
{
"epoch": 5.28,
"learning_rate": 4.5770359298012516e-05,
"loss": 1.3846,
"step": 7070
},
{
"epoch": 5.29,
"learning_rate": 4.5756518850689254e-05,
"loss": 1.386,
"step": 7080
},
{
"epoch": 5.3,
"learning_rate": 4.5742678403366e-05,
"loss": 1.4052,
"step": 7090
},
{
"epoch": 5.3,
"learning_rate": 4.572883795604274e-05,
"loss": 1.393,
"step": 7100
},
{
"epoch": 5.31,
"learning_rate": 4.571499750871948e-05,
"loss": 1.3616,
"step": 7110
},
{
"epoch": 5.32,
"learning_rate": 4.5701157061396226e-05,
"loss": 1.3648,
"step": 7120
},
{
"epoch": 5.33,
"learning_rate": 4.568731661407297e-05,
"loss": 1.407,
"step": 7130
},
{
"epoch": 5.33,
"learning_rate": 4.5673476166749715e-05,
"loss": 1.365,
"step": 7140
},
{
"epoch": 5.34,
"learning_rate": 4.565963571942645e-05,
"loss": 1.4256,
"step": 7150
},
{
"epoch": 5.35,
"learning_rate": 4.56457952721032e-05,
"loss": 1.4221,
"step": 7160
},
{
"epoch": 5.36,
"learning_rate": 4.5631954824779935e-05,
"loss": 1.3765,
"step": 7170
},
{
"epoch": 5.36,
"learning_rate": 4.5618114377456687e-05,
"loss": 1.4002,
"step": 7180
},
{
"epoch": 5.37,
"learning_rate": 4.5604273930133424e-05,
"loss": 1.4043,
"step": 7190
},
{
"epoch": 5.38,
"learning_rate": 4.559043348281017e-05,
"loss": 1.3603,
"step": 7200
},
{
"epoch": 5.39,
"learning_rate": 4.557659303548691e-05,
"loss": 1.3625,
"step": 7210
},
{
"epoch": 5.39,
"learning_rate": 4.556275258816365e-05,
"loss": 1.3209,
"step": 7220
},
{
"epoch": 5.4,
"learning_rate": 4.5548912140840396e-05,
"loss": 1.3886,
"step": 7230
},
{
"epoch": 5.41,
"learning_rate": 4.5535071693517134e-05,
"loss": 1.3497,
"step": 7240
},
{
"epoch": 5.42,
"learning_rate": 4.552123124619388e-05,
"loss": 1.3586,
"step": 7250
},
{
"epoch": 5.42,
"learning_rate": 4.550739079887062e-05,
"loss": 1.3899,
"step": 7260
},
{
"epoch": 5.43,
"learning_rate": 4.549355035154737e-05,
"loss": 1.409,
"step": 7270
},
{
"epoch": 5.44,
"learning_rate": 4.5479709904224106e-05,
"loss": 1.3601,
"step": 7280
},
{
"epoch": 5.45,
"learning_rate": 4.546586945690085e-05,
"loss": 1.4075,
"step": 7290
},
{
"epoch": 5.45,
"learning_rate": 4.545202900957759e-05,
"loss": 1.381,
"step": 7300
},
{
"epoch": 5.46,
"learning_rate": 4.543818856225433e-05,
"loss": 1.3739,
"step": 7310
},
{
"epoch": 5.47,
"learning_rate": 4.542434811493108e-05,
"loss": 1.3767,
"step": 7320
},
{
"epoch": 5.48,
"learning_rate": 4.541050766760782e-05,
"loss": 1.4156,
"step": 7330
},
{
"epoch": 5.48,
"learning_rate": 4.539666722028456e-05,
"loss": 1.3611,
"step": 7340
},
{
"epoch": 5.49,
"learning_rate": 4.5382826772961305e-05,
"loss": 1.3651,
"step": 7350
},
{
"epoch": 5.5,
"learning_rate": 4.536898632563805e-05,
"loss": 1.3977,
"step": 7360
},
{
"epoch": 5.51,
"learning_rate": 4.5355145878314794e-05,
"loss": 1.3529,
"step": 7370
},
{
"epoch": 5.51,
"learning_rate": 4.534130543099153e-05,
"loss": 1.3908,
"step": 7380
},
{
"epoch": 5.52,
"learning_rate": 4.532746498366827e-05,
"loss": 1.3684,
"step": 7390
},
{
"epoch": 5.53,
"learning_rate": 4.5313624536345015e-05,
"loss": 1.3919,
"step": 7400
},
{
"epoch": 5.54,
"learning_rate": 4.529978408902176e-05,
"loss": 1.4069,
"step": 7410
},
{
"epoch": 5.54,
"learning_rate": 4.5285943641698504e-05,
"loss": 1.3864,
"step": 7420
},
{
"epoch": 5.55,
"learning_rate": 4.527210319437524e-05,
"loss": 1.4244,
"step": 7430
},
{
"epoch": 5.56,
"learning_rate": 4.5258262747051986e-05,
"loss": 1.3648,
"step": 7440
},
{
"epoch": 5.57,
"learning_rate": 4.524442229972873e-05,
"loss": 1.3572,
"step": 7450
},
{
"epoch": 5.57,
"learning_rate": 4.5230581852405476e-05,
"loss": 1.3791,
"step": 7460
},
{
"epoch": 5.58,
"learning_rate": 4.5216741405082213e-05,
"loss": 1.4431,
"step": 7470
},
{
"epoch": 5.59,
"learning_rate": 4.520290095775896e-05,
"loss": 1.4064,
"step": 7480
},
{
"epoch": 5.6,
"learning_rate": 4.5189060510435696e-05,
"loss": 1.3902,
"step": 7490
},
{
"epoch": 5.6,
"learning_rate": 4.517522006311245e-05,
"loss": 1.3757,
"step": 7500
},
{
"epoch": 5.61,
"learning_rate": 4.5161379615789185e-05,
"loss": 1.3674,
"step": 7510
},
{
"epoch": 5.62,
"learning_rate": 4.514753916846592e-05,
"loss": 1.3832,
"step": 7520
},
{
"epoch": 5.63,
"learning_rate": 4.513369872114267e-05,
"loss": 1.3765,
"step": 7530
},
{
"epoch": 5.63,
"learning_rate": 4.511985827381941e-05,
"loss": 1.3749,
"step": 7540
},
{
"epoch": 5.64,
"learning_rate": 4.510601782649616e-05,
"loss": 1.3988,
"step": 7550
},
{
"epoch": 5.65,
"learning_rate": 4.5092177379172895e-05,
"loss": 1.3992,
"step": 7560
},
{
"epoch": 5.66,
"learning_rate": 4.507833693184964e-05,
"loss": 1.4123,
"step": 7570
},
{
"epoch": 5.66,
"learning_rate": 4.506449648452638e-05,
"loss": 1.3915,
"step": 7580
},
{
"epoch": 5.67,
"learning_rate": 4.505065603720313e-05,
"loss": 1.389,
"step": 7590
},
{
"epoch": 5.68,
"learning_rate": 4.503681558987987e-05,
"loss": 1.352,
"step": 7600
},
{
"epoch": 5.69,
"learning_rate": 4.502297514255661e-05,
"loss": 1.3763,
"step": 7610
},
{
"epoch": 5.69,
"learning_rate": 4.500913469523335e-05,
"loss": 1.3892,
"step": 7620
},
{
"epoch": 5.7,
"learning_rate": 4.4995294247910094e-05,
"loss": 1.3721,
"step": 7630
},
{
"epoch": 5.71,
"learning_rate": 4.498145380058684e-05,
"loss": 1.3932,
"step": 7640
},
{
"epoch": 5.72,
"learning_rate": 4.496761335326358e-05,
"loss": 1.3841,
"step": 7650
},
{
"epoch": 5.72,
"learning_rate": 4.495377290594032e-05,
"loss": 1.3426,
"step": 7660
},
{
"epoch": 5.73,
"learning_rate": 4.4939932458617066e-05,
"loss": 1.3197,
"step": 7670
},
{
"epoch": 5.74,
"learning_rate": 4.492609201129381e-05,
"loss": 1.3901,
"step": 7680
},
{
"epoch": 5.75,
"learning_rate": 4.491225156397055e-05,
"loss": 1.3658,
"step": 7690
},
{
"epoch": 5.75,
"learning_rate": 4.489841111664729e-05,
"loss": 1.4177,
"step": 7700
},
{
"epoch": 5.76,
"learning_rate": 4.488457066932403e-05,
"loss": 1.3531,
"step": 7710
},
{
"epoch": 5.77,
"learning_rate": 4.4870730222000775e-05,
"loss": 1.3763,
"step": 7720
},
{
"epoch": 5.78,
"learning_rate": 4.485688977467752e-05,
"loss": 1.3885,
"step": 7730
},
{
"epoch": 5.78,
"learning_rate": 4.4843049327354265e-05,
"loss": 1.4054,
"step": 7740
},
{
"epoch": 5.79,
"learning_rate": 4.4829208880031e-05,
"loss": 1.4166,
"step": 7750
},
{
"epoch": 5.8,
"learning_rate": 4.481536843270775e-05,
"loss": 1.4284,
"step": 7760
},
{
"epoch": 5.81,
"learning_rate": 4.480152798538449e-05,
"loss": 1.4082,
"step": 7770
},
{
"epoch": 5.81,
"learning_rate": 4.4787687538061236e-05,
"loss": 1.3747,
"step": 7780
},
{
"epoch": 5.82,
"learning_rate": 4.4773847090737974e-05,
"loss": 1.3827,
"step": 7790
},
{
"epoch": 5.83,
"learning_rate": 4.476000664341471e-05,
"loss": 1.3585,
"step": 7800
},
{
"epoch": 5.83,
"learning_rate": 4.4746166196091464e-05,
"loss": 1.3564,
"step": 7810
},
{
"epoch": 5.84,
"learning_rate": 4.47323257487682e-05,
"loss": 1.4271,
"step": 7820
},
{
"epoch": 5.85,
"learning_rate": 4.4718485301444946e-05,
"loss": 1.3757,
"step": 7830
},
{
"epoch": 5.86,
"learning_rate": 4.4704644854121684e-05,
"loss": 1.4002,
"step": 7840
},
{
"epoch": 5.86,
"learning_rate": 4.469080440679843e-05,
"loss": 1.3897,
"step": 7850
},
{
"epoch": 5.87,
"learning_rate": 4.467696395947517e-05,
"loss": 1.3824,
"step": 7860
},
{
"epoch": 5.88,
"learning_rate": 4.466312351215192e-05,
"loss": 1.3381,
"step": 7870
},
{
"epoch": 5.89,
"learning_rate": 4.4649283064828656e-05,
"loss": 1.3818,
"step": 7880
},
{
"epoch": 5.89,
"learning_rate": 4.46354426175054e-05,
"loss": 1.3906,
"step": 7890
},
{
"epoch": 5.9,
"learning_rate": 4.4621602170182145e-05,
"loss": 1.4259,
"step": 7900
},
{
"epoch": 5.91,
"learning_rate": 4.460776172285889e-05,
"loss": 1.4127,
"step": 7910
},
{
"epoch": 5.92,
"learning_rate": 4.459392127553563e-05,
"loss": 1.4012,
"step": 7920
},
{
"epoch": 5.92,
"learning_rate": 4.4580080828212365e-05,
"loss": 1.3853,
"step": 7930
},
{
"epoch": 5.93,
"learning_rate": 4.456624038088911e-05,
"loss": 1.3867,
"step": 7940
},
{
"epoch": 5.94,
"learning_rate": 4.4552399933565855e-05,
"loss": 1.3598,
"step": 7950
},
{
"epoch": 5.95,
"learning_rate": 4.45385594862426e-05,
"loss": 1.3801,
"step": 7960
},
{
"epoch": 5.95,
"learning_rate": 4.452471903891934e-05,
"loss": 1.4468,
"step": 7970
},
{
"epoch": 5.96,
"learning_rate": 4.451087859159608e-05,
"loss": 1.3557,
"step": 7980
},
{
"epoch": 5.97,
"learning_rate": 4.4497038144272826e-05,
"loss": 1.3878,
"step": 7990
},
{
"epoch": 5.98,
"learning_rate": 4.448319769694957e-05,
"loss": 1.3894,
"step": 8000
},
{
"epoch": 5.98,
"learning_rate": 4.446935724962631e-05,
"loss": 1.3972,
"step": 8010
},
{
"epoch": 5.99,
"learning_rate": 4.4455516802303054e-05,
"loss": 1.3723,
"step": 8020
},
{
"epoch": 6.0,
"learning_rate": 4.444167635497979e-05,
"loss": 1.4148,
"step": 8030
},
{
"epoch": 6.0,
"eval_accuracy": 0.5523399338200535,
"eval_loss": 1.0459073781967163,
"eval_runtime": 70.9815,
"eval_samples_per_second": 268.225,
"eval_steps_per_second": 8.382,
"step": 8031
},
{
"epoch": 6.01,
"learning_rate": 4.442783590765654e-05,
"loss": 1.3699,
"step": 8040
},
{
"epoch": 6.01,
"learning_rate": 4.441399546033328e-05,
"loss": 1.3815,
"step": 8050
},
{
"epoch": 6.02,
"learning_rate": 4.4400155013010025e-05,
"loss": 1.364,
"step": 8060
},
{
"epoch": 6.03,
"learning_rate": 4.438631456568676e-05,
"loss": 1.405,
"step": 8070
},
{
"epoch": 6.04,
"learning_rate": 4.437247411836351e-05,
"loss": 1.358,
"step": 8080
},
{
"epoch": 6.04,
"learning_rate": 4.435863367104025e-05,
"loss": 1.4103,
"step": 8090
},
{
"epoch": 6.05,
"learning_rate": 4.434479322371699e-05,
"loss": 1.4115,
"step": 8100
},
{
"epoch": 6.06,
"learning_rate": 4.4330952776393735e-05,
"loss": 1.3507,
"step": 8110
},
{
"epoch": 6.07,
"learning_rate": 4.431711232907047e-05,
"loss": 1.3921,
"step": 8120
},
{
"epoch": 6.07,
"learning_rate": 4.4303271881747224e-05,
"loss": 1.3784,
"step": 8130
},
{
"epoch": 6.08,
"learning_rate": 4.428943143442396e-05,
"loss": 1.295,
"step": 8140
},
{
"epoch": 6.09,
"learning_rate": 4.427559098710071e-05,
"loss": 1.4129,
"step": 8150
},
{
"epoch": 6.1,
"learning_rate": 4.4261750539777445e-05,
"loss": 1.3912,
"step": 8160
},
{
"epoch": 6.1,
"learning_rate": 4.424791009245419e-05,
"loss": 1.357,
"step": 8170
},
{
"epoch": 6.11,
"learning_rate": 4.4234069645130934e-05,
"loss": 1.3902,
"step": 8180
},
{
"epoch": 6.12,
"learning_rate": 4.422022919780768e-05,
"loss": 1.3714,
"step": 8190
},
{
"epoch": 6.13,
"learning_rate": 4.4206388750484416e-05,
"loss": 1.3822,
"step": 8200
},
{
"epoch": 6.13,
"learning_rate": 4.4192548303161154e-05,
"loss": 1.4062,
"step": 8210
},
{
"epoch": 6.14,
"learning_rate": 4.4178707855837906e-05,
"loss": 1.3474,
"step": 8220
},
{
"epoch": 6.15,
"learning_rate": 4.4164867408514644e-05,
"loss": 1.3475,
"step": 8230
},
{
"epoch": 6.16,
"learning_rate": 4.415102696119139e-05,
"loss": 1.3441,
"step": 8240
},
{
"epoch": 6.16,
"learning_rate": 4.4137186513868126e-05,
"loss": 1.4146,
"step": 8250
},
{
"epoch": 6.17,
"learning_rate": 4.412334606654487e-05,
"loss": 1.3527,
"step": 8260
},
{
"epoch": 6.18,
"learning_rate": 4.4109505619221615e-05,
"loss": 1.368,
"step": 8270
},
{
"epoch": 6.19,
"learning_rate": 4.409566517189836e-05,
"loss": 1.3658,
"step": 8280
},
{
"epoch": 6.19,
"learning_rate": 4.40818247245751e-05,
"loss": 1.3803,
"step": 8290
},
{
"epoch": 6.2,
"learning_rate": 4.406798427725184e-05,
"loss": 1.3601,
"step": 8300
},
{
"epoch": 6.21,
"learning_rate": 4.405414382992859e-05,
"loss": 1.3993,
"step": 8310
},
{
"epoch": 6.22,
"learning_rate": 4.404030338260533e-05,
"loss": 1.4092,
"step": 8320
},
{
"epoch": 6.22,
"learning_rate": 4.402646293528207e-05,
"loss": 1.38,
"step": 8330
},
{
"epoch": 6.23,
"learning_rate": 4.4012622487958814e-05,
"loss": 1.386,
"step": 8340
},
{
"epoch": 6.24,
"learning_rate": 4.399878204063555e-05,
"loss": 1.3776,
"step": 8350
},
{
"epoch": 6.25,
"learning_rate": 4.39849415933123e-05,
"loss": 1.3764,
"step": 8360
},
{
"epoch": 6.25,
"learning_rate": 4.397110114598904e-05,
"loss": 1.3801,
"step": 8370
},
{
"epoch": 6.26,
"learning_rate": 4.395726069866578e-05,
"loss": 1.3745,
"step": 8380
},
{
"epoch": 6.27,
"learning_rate": 4.3943420251342524e-05,
"loss": 1.3695,
"step": 8390
},
{
"epoch": 6.28,
"learning_rate": 4.392957980401927e-05,
"loss": 1.4083,
"step": 8400
},
{
"epoch": 6.28,
"learning_rate": 4.391573935669601e-05,
"loss": 1.4108,
"step": 8410
},
{
"epoch": 6.29,
"learning_rate": 4.390189890937275e-05,
"loss": 1.3923,
"step": 8420
},
{
"epoch": 6.3,
"learning_rate": 4.3888058462049496e-05,
"loss": 1.3897,
"step": 8430
},
{
"epoch": 6.31,
"learning_rate": 4.387421801472624e-05,
"loss": 1.3981,
"step": 8440
},
{
"epoch": 6.31,
"learning_rate": 4.3860377567402985e-05,
"loss": 1.3693,
"step": 8450
},
{
"epoch": 6.32,
"learning_rate": 4.384653712007972e-05,
"loss": 1.3615,
"step": 8460
},
{
"epoch": 6.33,
"learning_rate": 4.383269667275647e-05,
"loss": 1.3455,
"step": 8470
},
{
"epoch": 6.34,
"learning_rate": 4.3818856225433205e-05,
"loss": 1.3345,
"step": 8480
},
{
"epoch": 6.34,
"learning_rate": 4.380501577810995e-05,
"loss": 1.396,
"step": 8490
},
{
"epoch": 6.35,
"learning_rate": 4.3791175330786695e-05,
"loss": 1.3169,
"step": 8500
},
{
"epoch": 6.36,
"learning_rate": 4.377733488346343e-05,
"loss": 1.3695,
"step": 8510
},
{
"epoch": 6.37,
"learning_rate": 4.376349443614018e-05,
"loss": 1.3651,
"step": 8520
},
{
"epoch": 6.37,
"learning_rate": 4.374965398881692e-05,
"loss": 1.3947,
"step": 8530
},
{
"epoch": 6.38,
"learning_rate": 4.3735813541493667e-05,
"loss": 1.3795,
"step": 8540
},
{
"epoch": 6.39,
"learning_rate": 4.3721973094170404e-05,
"loss": 1.3883,
"step": 8550
},
{
"epoch": 6.4,
"learning_rate": 4.370813264684715e-05,
"loss": 1.4328,
"step": 8560
},
{
"epoch": 6.4,
"learning_rate": 4.369429219952389e-05,
"loss": 1.4079,
"step": 8570
},
{
"epoch": 6.41,
"learning_rate": 4.368045175220064e-05,
"loss": 1.3472,
"step": 8580
},
{
"epoch": 6.42,
"learning_rate": 4.3666611304877376e-05,
"loss": 1.3836,
"step": 8590
},
{
"epoch": 6.43,
"learning_rate": 4.365277085755412e-05,
"loss": 1.3369,
"step": 8600
},
{
"epoch": 6.43,
"learning_rate": 4.363893041023086e-05,
"loss": 1.346,
"step": 8610
},
{
"epoch": 6.44,
"learning_rate": 4.36250899629076e-05,
"loss": 1.359,
"step": 8620
},
{
"epoch": 6.45,
"learning_rate": 4.361124951558435e-05,
"loss": 1.3328,
"step": 8630
},
{
"epoch": 6.45,
"learning_rate": 4.3597409068261086e-05,
"loss": 1.4035,
"step": 8640
},
{
"epoch": 6.46,
"learning_rate": 4.358356862093783e-05,
"loss": 1.3451,
"step": 8650
},
{
"epoch": 6.47,
"learning_rate": 4.356972817361457e-05,
"loss": 1.3732,
"step": 8660
},
{
"epoch": 6.48,
"learning_rate": 4.355588772629132e-05,
"loss": 1.4163,
"step": 8670
},
{
"epoch": 6.48,
"learning_rate": 4.354204727896806e-05,
"loss": 1.3671,
"step": 8680
},
{
"epoch": 6.49,
"learning_rate": 4.35282068316448e-05,
"loss": 1.3319,
"step": 8690
},
{
"epoch": 6.5,
"learning_rate": 4.351436638432154e-05,
"loss": 1.4192,
"step": 8700
},
{
"epoch": 6.51,
"learning_rate": 4.3500525936998285e-05,
"loss": 1.3791,
"step": 8710
},
{
"epoch": 6.51,
"learning_rate": 4.348668548967503e-05,
"loss": 1.4131,
"step": 8720
},
{
"epoch": 6.52,
"learning_rate": 4.3472845042351774e-05,
"loss": 1.3539,
"step": 8730
},
{
"epoch": 6.53,
"learning_rate": 4.345900459502851e-05,
"loss": 1.4077,
"step": 8740
},
{
"epoch": 6.54,
"learning_rate": 4.3445164147705257e-05,
"loss": 1.3887,
"step": 8750
},
{
"epoch": 6.54,
"learning_rate": 4.3431323700382e-05,
"loss": 1.3855,
"step": 8760
},
{
"epoch": 6.55,
"learning_rate": 4.341748325305874e-05,
"loss": 1.368,
"step": 8770
},
{
"epoch": 6.56,
"learning_rate": 4.3403642805735484e-05,
"loss": 1.4143,
"step": 8780
},
{
"epoch": 6.57,
"learning_rate": 4.338980235841222e-05,
"loss": 1.3663,
"step": 8790
},
{
"epoch": 6.57,
"learning_rate": 4.3375961911088966e-05,
"loss": 1.4196,
"step": 8800
},
{
"epoch": 6.58,
"learning_rate": 4.336212146376571e-05,
"loss": 1.3553,
"step": 8810
},
{
"epoch": 6.59,
"learning_rate": 4.3348281016442456e-05,
"loss": 1.363,
"step": 8820
},
{
"epoch": 6.6,
"learning_rate": 4.3334440569119193e-05,
"loss": 1.3432,
"step": 8830
},
{
"epoch": 6.6,
"learning_rate": 4.332060012179594e-05,
"loss": 1.3703,
"step": 8840
},
{
"epoch": 6.61,
"learning_rate": 4.330675967447268e-05,
"loss": 1.3614,
"step": 8850
},
{
"epoch": 6.62,
"learning_rate": 4.329291922714943e-05,
"loss": 1.357,
"step": 8860
},
{
"epoch": 6.63,
"learning_rate": 4.3279078779826165e-05,
"loss": 1.3625,
"step": 8870
},
{
"epoch": 6.63,
"learning_rate": 4.326523833250291e-05,
"loss": 1.3976,
"step": 8880
},
{
"epoch": 6.64,
"learning_rate": 4.325139788517965e-05,
"loss": 1.3535,
"step": 8890
},
{
"epoch": 6.65,
"learning_rate": 4.32375574378564e-05,
"loss": 1.3514,
"step": 8900
},
{
"epoch": 6.66,
"learning_rate": 4.322371699053314e-05,
"loss": 1.3805,
"step": 8910
},
{
"epoch": 6.66,
"learning_rate": 4.3209876543209875e-05,
"loss": 1.3812,
"step": 8920
},
{
"epoch": 6.67,
"learning_rate": 4.319603609588662e-05,
"loss": 1.3494,
"step": 8930
},
{
"epoch": 6.68,
"learning_rate": 4.3182195648563364e-05,
"loss": 1.3809,
"step": 8940
},
{
"epoch": 6.69,
"learning_rate": 4.316835520124011e-05,
"loss": 1.3981,
"step": 8950
},
{
"epoch": 6.69,
"learning_rate": 4.315451475391685e-05,
"loss": 1.3999,
"step": 8960
},
{
"epoch": 6.7,
"learning_rate": 4.314067430659359e-05,
"loss": 1.4749,
"step": 8970
},
{
"epoch": 6.71,
"learning_rate": 4.312683385927033e-05,
"loss": 1.3845,
"step": 8980
},
{
"epoch": 6.72,
"learning_rate": 4.311299341194708e-05,
"loss": 1.3419,
"step": 8990
},
{
"epoch": 6.72,
"learning_rate": 4.309915296462382e-05,
"loss": 1.3734,
"step": 9000
},
{
"epoch": 6.73,
"learning_rate": 4.308531251730056e-05,
"loss": 1.3875,
"step": 9010
},
{
"epoch": 6.74,
"learning_rate": 4.30714720699773e-05,
"loss": 1.3473,
"step": 9020
},
{
"epoch": 6.75,
"learning_rate": 4.3057631622654046e-05,
"loss": 1.3942,
"step": 9030
},
{
"epoch": 6.75,
"learning_rate": 4.304379117533079e-05,
"loss": 1.4169,
"step": 9040
},
{
"epoch": 6.76,
"learning_rate": 4.302995072800753e-05,
"loss": 1.4003,
"step": 9050
},
{
"epoch": 6.77,
"learning_rate": 4.301611028068427e-05,
"loss": 1.3686,
"step": 9060
},
{
"epoch": 6.78,
"learning_rate": 4.300226983336102e-05,
"loss": 1.3632,
"step": 9070
},
{
"epoch": 6.78,
"learning_rate": 4.298842938603776e-05,
"loss": 1.336,
"step": 9080
},
{
"epoch": 6.79,
"learning_rate": 4.29745889387145e-05,
"loss": 1.3884,
"step": 9090
},
{
"epoch": 6.8,
"learning_rate": 4.2960748491391245e-05,
"loss": 1.334,
"step": 9100
},
{
"epoch": 6.81,
"learning_rate": 4.294690804406798e-05,
"loss": 1.3552,
"step": 9110
},
{
"epoch": 6.81,
"learning_rate": 4.2933067596744734e-05,
"loss": 1.3961,
"step": 9120
},
{
"epoch": 6.82,
"learning_rate": 4.291922714942147e-05,
"loss": 1.35,
"step": 9130
},
{
"epoch": 6.83,
"learning_rate": 4.2905386702098216e-05,
"loss": 1.3617,
"step": 9140
},
{
"epoch": 6.84,
"learning_rate": 4.2891546254774954e-05,
"loss": 1.3952,
"step": 9150
},
{
"epoch": 6.84,
"learning_rate": 4.28777058074517e-05,
"loss": 1.3675,
"step": 9160
},
{
"epoch": 6.85,
"learning_rate": 4.2863865360128443e-05,
"loss": 1.3247,
"step": 9170
},
{
"epoch": 6.86,
"learning_rate": 4.285002491280518e-05,
"loss": 1.3541,
"step": 9180
},
{
"epoch": 6.87,
"learning_rate": 4.2836184465481926e-05,
"loss": 1.3712,
"step": 9190
},
{
"epoch": 6.87,
"learning_rate": 4.2822344018158664e-05,
"loss": 1.3916,
"step": 9200
},
{
"epoch": 6.88,
"learning_rate": 4.2808503570835415e-05,
"loss": 1.3535,
"step": 9210
},
{
"epoch": 6.89,
"learning_rate": 4.279466312351215e-05,
"loss": 1.363,
"step": 9220
},
{
"epoch": 6.9,
"learning_rate": 4.27808226761889e-05,
"loss": 1.4012,
"step": 9230
},
{
"epoch": 6.9,
"learning_rate": 4.2766982228865636e-05,
"loss": 1.3838,
"step": 9240
},
{
"epoch": 6.91,
"learning_rate": 4.275314178154238e-05,
"loss": 1.3937,
"step": 9250
},
{
"epoch": 6.92,
"learning_rate": 4.2739301334219125e-05,
"loss": 1.3694,
"step": 9260
},
{
"epoch": 6.93,
"learning_rate": 4.272546088689587e-05,
"loss": 1.3951,
"step": 9270
},
{
"epoch": 6.93,
"learning_rate": 4.271162043957261e-05,
"loss": 1.3196,
"step": 9280
},
{
"epoch": 6.94,
"learning_rate": 4.269777999224935e-05,
"loss": 1.3641,
"step": 9290
},
{
"epoch": 6.95,
"learning_rate": 4.26839395449261e-05,
"loss": 1.3593,
"step": 9300
},
{
"epoch": 6.96,
"learning_rate": 4.267009909760284e-05,
"loss": 1.3802,
"step": 9310
},
{
"epoch": 6.96,
"learning_rate": 4.265625865027958e-05,
"loss": 1.3727,
"step": 9320
},
{
"epoch": 6.97,
"learning_rate": 4.264241820295632e-05,
"loss": 1.3702,
"step": 9330
},
{
"epoch": 6.98,
"learning_rate": 4.262857775563306e-05,
"loss": 1.3803,
"step": 9340
},
{
"epoch": 6.99,
"learning_rate": 4.2614737308309806e-05,
"loss": 1.3847,
"step": 9350
},
{
"epoch": 6.99,
"learning_rate": 4.260089686098655e-05,
"loss": 1.3921,
"step": 9360
},
{
"epoch": 7.0,
"eval_accuracy": 0.5500814118388571,
"eval_loss": 1.0463857650756836,
"eval_runtime": 238.7158,
"eval_samples_per_second": 79.756,
"eval_steps_per_second": 2.493,
"step": 9369
},
{
"epoch": 7.0,
"learning_rate": 4.258705641366329e-05,
"loss": 1.3631,
"step": 9370
},
{
"epoch": 7.01,
"learning_rate": 4.2573215966340034e-05,
"loss": 1.4052,
"step": 9380
},
{
"epoch": 7.02,
"learning_rate": 4.255937551901678e-05,
"loss": 1.3432,
"step": 9390
},
{
"epoch": 7.02,
"learning_rate": 4.254553507169352e-05,
"loss": 1.3543,
"step": 9400
},
{
"epoch": 7.03,
"learning_rate": 4.253169462437026e-05,
"loss": 1.3618,
"step": 9410
},
{
"epoch": 7.04,
"learning_rate": 4.2517854177047005e-05,
"loss": 1.3862,
"step": 9420
},
{
"epoch": 7.05,
"learning_rate": 4.250401372972374e-05,
"loss": 1.4113,
"step": 9430
},
{
"epoch": 7.05,
"learning_rate": 4.2490173282400495e-05,
"loss": 1.3998,
"step": 9440
},
{
"epoch": 7.06,
"learning_rate": 4.247633283507723e-05,
"loss": 1.3359,
"step": 9450
},
{
"epoch": 7.07,
"learning_rate": 4.246249238775397e-05,
"loss": 1.3383,
"step": 9460
},
{
"epoch": 7.08,
"learning_rate": 4.2448651940430715e-05,
"loss": 1.384,
"step": 9470
},
{
"epoch": 7.08,
"learning_rate": 4.243481149310746e-05,
"loss": 1.383,
"step": 9480
},
{
"epoch": 7.09,
"learning_rate": 4.2420971045784204e-05,
"loss": 1.3548,
"step": 9490
},
{
"epoch": 7.1,
"learning_rate": 4.240713059846094e-05,
"loss": 1.3864,
"step": 9500
},
{
"epoch": 7.1,
"learning_rate": 4.239329015113769e-05,
"loss": 1.3947,
"step": 9510
},
{
"epoch": 7.11,
"learning_rate": 4.2379449703814425e-05,
"loss": 1.4068,
"step": 9520
},
{
"epoch": 7.12,
"learning_rate": 4.2365609256491176e-05,
"loss": 1.3726,
"step": 9530
},
{
"epoch": 7.13,
"learning_rate": 4.2351768809167914e-05,
"loss": 1.3707,
"step": 9540
},
{
"epoch": 7.13,
"learning_rate": 4.233792836184466e-05,
"loss": 1.3528,
"step": 9550
},
{
"epoch": 7.14,
"learning_rate": 4.2324087914521396e-05,
"loss": 1.3301,
"step": 9560
},
{
"epoch": 7.15,
"learning_rate": 4.231024746719814e-05,
"loss": 1.3843,
"step": 9570
},
{
"epoch": 7.16,
"learning_rate": 4.2296407019874886e-05,
"loss": 1.4158,
"step": 9580
},
{
"epoch": 7.16,
"learning_rate": 4.228256657255163e-05,
"loss": 1.4,
"step": 9590
},
{
"epoch": 7.17,
"learning_rate": 4.226872612522837e-05,
"loss": 1.3379,
"step": 9600
},
{
"epoch": 7.18,
"learning_rate": 4.2254885677905106e-05,
"loss": 1.3837,
"step": 9610
},
{
"epoch": 7.19,
"learning_rate": 4.224104523058186e-05,
"loss": 1.3532,
"step": 9620
},
{
"epoch": 7.19,
"learning_rate": 4.2227204783258595e-05,
"loss": 1.3943,
"step": 9630
},
{
"epoch": 7.2,
"learning_rate": 4.221336433593534e-05,
"loss": 1.3243,
"step": 9640
},
{
"epoch": 7.21,
"learning_rate": 4.219952388861208e-05,
"loss": 1.3824,
"step": 9650
},
{
"epoch": 7.22,
"learning_rate": 4.218568344128882e-05,
"loss": 1.3527,
"step": 9660
},
{
"epoch": 7.22,
"learning_rate": 4.217184299396557e-05,
"loss": 1.4035,
"step": 9670
},
{
"epoch": 7.23,
"learning_rate": 4.215800254664231e-05,
"loss": 1.3533,
"step": 9680
},
{
"epoch": 7.24,
"learning_rate": 4.214416209931905e-05,
"loss": 1.383,
"step": 9690
},
{
"epoch": 7.25,
"learning_rate": 4.2130321651995794e-05,
"loss": 1.3635,
"step": 9700
},
{
"epoch": 7.25,
"learning_rate": 4.211648120467254e-05,
"loss": 1.3854,
"step": 9710
},
{
"epoch": 7.26,
"learning_rate": 4.2102640757349284e-05,
"loss": 1.3541,
"step": 9720
},
{
"epoch": 7.27,
"learning_rate": 4.208880031002602e-05,
"loss": 1.3731,
"step": 9730
},
{
"epoch": 7.28,
"learning_rate": 4.207495986270276e-05,
"loss": 1.354,
"step": 9740
},
{
"epoch": 7.28,
"learning_rate": 4.206111941537951e-05,
"loss": 1.3705,
"step": 9750
},
{
"epoch": 7.29,
"learning_rate": 4.204727896805625e-05,
"loss": 1.3463,
"step": 9760
},
{
"epoch": 7.3,
"learning_rate": 4.203343852073299e-05,
"loss": 1.3523,
"step": 9770
},
{
"epoch": 7.31,
"learning_rate": 4.201959807340973e-05,
"loss": 1.4043,
"step": 9780
},
{
"epoch": 7.31,
"learning_rate": 4.2005757626086476e-05,
"loss": 1.3709,
"step": 9790
},
{
"epoch": 7.32,
"learning_rate": 4.199191717876322e-05,
"loss": 1.3228,
"step": 9800
},
{
"epoch": 7.33,
"learning_rate": 4.1978076731439965e-05,
"loss": 1.3825,
"step": 9810
},
{
"epoch": 7.34,
"learning_rate": 4.19642362841167e-05,
"loss": 1.3569,
"step": 9820
},
{
"epoch": 7.34,
"learning_rate": 4.195039583679345e-05,
"loss": 1.3689,
"step": 9830
},
{
"epoch": 7.35,
"learning_rate": 4.193655538947019e-05,
"loss": 1.3506,
"step": 9840
},
{
"epoch": 7.36,
"learning_rate": 4.192271494214694e-05,
"loss": 1.373,
"step": 9850
},
{
"epoch": 7.37,
"learning_rate": 4.1908874494823675e-05,
"loss": 1.4032,
"step": 9860
},
{
"epoch": 7.37,
"learning_rate": 4.189503404750042e-05,
"loss": 1.3258,
"step": 9870
},
{
"epoch": 7.38,
"learning_rate": 4.188119360017716e-05,
"loss": 1.3326,
"step": 9880
},
{
"epoch": 7.39,
"learning_rate": 4.18673531528539e-05,
"loss": 1.3693,
"step": 9890
},
{
"epoch": 7.4,
"learning_rate": 4.1853512705530646e-05,
"loss": 1.3751,
"step": 9900
},
{
"epoch": 7.4,
"learning_rate": 4.1839672258207384e-05,
"loss": 1.397,
"step": 9910
},
{
"epoch": 7.41,
"learning_rate": 4.182583181088413e-05,
"loss": 1.3394,
"step": 9920
},
{
"epoch": 7.42,
"learning_rate": 4.1811991363560874e-05,
"loss": 1.3735,
"step": 9930
},
{
"epoch": 7.43,
"learning_rate": 4.179815091623762e-05,
"loss": 1.3203,
"step": 9940
},
{
"epoch": 7.43,
"learning_rate": 4.1784310468914356e-05,
"loss": 1.3944,
"step": 9950
},
{
"epoch": 7.44,
"learning_rate": 4.17704700215911e-05,
"loss": 1.3931,
"step": 9960
},
{
"epoch": 7.45,
"learning_rate": 4.175662957426784e-05,
"loss": 1.3682,
"step": 9970
},
{
"epoch": 7.46,
"learning_rate": 4.174278912694459e-05,
"loss": 1.3836,
"step": 9980
},
{
"epoch": 7.46,
"learning_rate": 4.172894867962133e-05,
"loss": 1.4164,
"step": 9990
},
{
"epoch": 7.47,
"learning_rate": 4.171510823229807e-05,
"loss": 1.3469,
"step": 10000
},
{
"epoch": 7.48,
"learning_rate": 4.170126778497481e-05,
"loss": 1.3649,
"step": 10010
},
{
"epoch": 7.49,
"learning_rate": 4.1687427337651555e-05,
"loss": 1.3513,
"step": 10020
},
{
"epoch": 7.49,
"learning_rate": 4.16735868903283e-05,
"loss": 1.3732,
"step": 10030
},
{
"epoch": 7.5,
"learning_rate": 4.165974644300504e-05,
"loss": 1.3582,
"step": 10040
},
{
"epoch": 7.51,
"learning_rate": 4.164590599568178e-05,
"loss": 1.3946,
"step": 10050
},
{
"epoch": 7.52,
"learning_rate": 4.163206554835852e-05,
"loss": 1.3411,
"step": 10060
},
{
"epoch": 7.52,
"learning_rate": 4.161822510103527e-05,
"loss": 1.3123,
"step": 10070
},
{
"epoch": 7.53,
"learning_rate": 4.160438465371201e-05,
"loss": 1.3227,
"step": 10080
},
{
"epoch": 7.54,
"learning_rate": 4.1590544206388754e-05,
"loss": 1.3962,
"step": 10090
},
{
"epoch": 7.55,
"learning_rate": 4.157670375906549e-05,
"loss": 1.3891,
"step": 10100
},
{
"epoch": 7.55,
"learning_rate": 4.1562863311742237e-05,
"loss": 1.3614,
"step": 10110
},
{
"epoch": 7.56,
"learning_rate": 4.154902286441898e-05,
"loss": 1.3468,
"step": 10120
},
{
"epoch": 7.57,
"learning_rate": 4.1535182417095726e-05,
"loss": 1.3759,
"step": 10130
},
{
"epoch": 7.58,
"learning_rate": 4.1521341969772464e-05,
"loss": 1.352,
"step": 10140
},
{
"epoch": 7.58,
"learning_rate": 4.150750152244921e-05,
"loss": 1.3357,
"step": 10150
},
{
"epoch": 7.59,
"learning_rate": 4.149366107512595e-05,
"loss": 1.362,
"step": 10160
},
{
"epoch": 7.6,
"learning_rate": 4.147982062780269e-05,
"loss": 1.3648,
"step": 10170
},
{
"epoch": 7.61,
"learning_rate": 4.1465980180479435e-05,
"loss": 1.3725,
"step": 10180
},
{
"epoch": 7.61,
"learning_rate": 4.145213973315617e-05,
"loss": 1.3633,
"step": 10190
},
{
"epoch": 7.62,
"learning_rate": 4.143829928583292e-05,
"loss": 1.3625,
"step": 10200
},
{
"epoch": 7.63,
"learning_rate": 4.142445883850966e-05,
"loss": 1.4001,
"step": 10210
},
{
"epoch": 7.64,
"learning_rate": 4.141061839118641e-05,
"loss": 1.4015,
"step": 10220
},
{
"epoch": 7.64,
"learning_rate": 4.1396777943863145e-05,
"loss": 1.3823,
"step": 10230
},
{
"epoch": 7.65,
"learning_rate": 4.138293749653989e-05,
"loss": 1.3884,
"step": 10240
},
{
"epoch": 7.66,
"learning_rate": 4.1369097049216634e-05,
"loss": 1.3843,
"step": 10250
},
{
"epoch": 7.67,
"learning_rate": 4.135525660189338e-05,
"loss": 1.3666,
"step": 10260
},
{
"epoch": 7.67,
"learning_rate": 4.134141615457012e-05,
"loss": 1.4094,
"step": 10270
},
{
"epoch": 7.68,
"learning_rate": 4.132757570724686e-05,
"loss": 1.3444,
"step": 10280
},
{
"epoch": 7.69,
"learning_rate": 4.13137352599236e-05,
"loss": 1.3351,
"step": 10290
},
{
"epoch": 7.7,
"learning_rate": 4.1299894812600344e-05,
"loss": 1.3515,
"step": 10300
},
{
"epoch": 7.7,
"learning_rate": 4.128605436527709e-05,
"loss": 1.3879,
"step": 10310
},
{
"epoch": 7.71,
"learning_rate": 4.1272213917953827e-05,
"loss": 1.3308,
"step": 10320
},
{
"epoch": 7.72,
"learning_rate": 4.125837347063057e-05,
"loss": 1.3668,
"step": 10330
},
{
"epoch": 7.73,
"learning_rate": 4.1244533023307316e-05,
"loss": 1.3326,
"step": 10340
},
{
"epoch": 7.73,
"learning_rate": 4.123069257598406e-05,
"loss": 1.3629,
"step": 10350
},
{
"epoch": 7.74,
"learning_rate": 4.12168521286608e-05,
"loss": 1.3731,
"step": 10360
},
{
"epoch": 7.75,
"learning_rate": 4.120301168133754e-05,
"loss": 1.3608,
"step": 10370
},
{
"epoch": 7.75,
"learning_rate": 4.118917123401429e-05,
"loss": 1.3526,
"step": 10380
},
{
"epoch": 7.76,
"learning_rate": 4.117533078669103e-05,
"loss": 1.3499,
"step": 10390
},
{
"epoch": 7.77,
"learning_rate": 4.116149033936777e-05,
"loss": 1.3668,
"step": 10400
},
{
"epoch": 7.78,
"learning_rate": 4.1147649892044515e-05,
"loss": 1.3599,
"step": 10410
},
{
"epoch": 7.78,
"learning_rate": 4.113380944472125e-05,
"loss": 1.3688,
"step": 10420
},
{
"epoch": 7.79,
"learning_rate": 4.1119968997398e-05,
"loss": 1.3629,
"step": 10430
},
{
"epoch": 7.8,
"learning_rate": 4.110612855007474e-05,
"loss": 1.3202,
"step": 10440
},
{
"epoch": 7.81,
"learning_rate": 4.109228810275148e-05,
"loss": 1.3327,
"step": 10450
},
{
"epoch": 7.81,
"learning_rate": 4.1078447655428224e-05,
"loss": 1.3772,
"step": 10460
},
{
"epoch": 7.82,
"learning_rate": 4.106460720810497e-05,
"loss": 1.3614,
"step": 10470
},
{
"epoch": 7.83,
"learning_rate": 4.1050766760781714e-05,
"loss": 1.3987,
"step": 10480
},
{
"epoch": 7.84,
"learning_rate": 4.103692631345845e-05,
"loss": 1.3334,
"step": 10490
},
{
"epoch": 7.84,
"learning_rate": 4.1023085866135196e-05,
"loss": 1.3446,
"step": 10500
},
{
"epoch": 7.85,
"learning_rate": 4.1009245418811934e-05,
"loss": 1.3537,
"step": 10510
},
{
"epoch": 7.86,
"learning_rate": 4.0995404971488686e-05,
"loss": 1.3697,
"step": 10520
},
{
"epoch": 7.87,
"learning_rate": 4.0981564524165423e-05,
"loss": 1.3773,
"step": 10530
},
{
"epoch": 7.87,
"learning_rate": 4.096772407684217e-05,
"loss": 1.3465,
"step": 10540
},
{
"epoch": 7.88,
"learning_rate": 4.0953883629518906e-05,
"loss": 1.3714,
"step": 10550
},
{
"epoch": 7.89,
"learning_rate": 4.094004318219565e-05,
"loss": 1.3629,
"step": 10560
},
{
"epoch": 7.9,
"learning_rate": 4.0926202734872395e-05,
"loss": 1.3712,
"step": 10570
},
{
"epoch": 7.9,
"learning_rate": 4.091236228754913e-05,
"loss": 1.359,
"step": 10580
},
{
"epoch": 7.91,
"learning_rate": 4.089852184022588e-05,
"loss": 1.3225,
"step": 10590
},
{
"epoch": 7.92,
"learning_rate": 4.0884681392902616e-05,
"loss": 1.3376,
"step": 10600
},
{
"epoch": 7.93,
"learning_rate": 4.087084094557937e-05,
"loss": 1.3752,
"step": 10610
},
{
"epoch": 7.93,
"learning_rate": 4.0857000498256105e-05,
"loss": 1.3767,
"step": 10620
},
{
"epoch": 7.94,
"learning_rate": 4.084316005093285e-05,
"loss": 1.3432,
"step": 10630
},
{
"epoch": 7.95,
"learning_rate": 4.082931960360959e-05,
"loss": 1.3792,
"step": 10640
},
{
"epoch": 7.96,
"learning_rate": 4.081547915628633e-05,
"loss": 1.3713,
"step": 10650
},
{
"epoch": 7.96,
"learning_rate": 4.080163870896308e-05,
"loss": 1.3621,
"step": 10660
},
{
"epoch": 7.97,
"learning_rate": 4.078779826163982e-05,
"loss": 1.4097,
"step": 10670
},
{
"epoch": 7.98,
"learning_rate": 4.077395781431656e-05,
"loss": 1.3484,
"step": 10680
},
{
"epoch": 7.99,
"learning_rate": 4.0760117366993304e-05,
"loss": 1.3528,
"step": 10690
},
{
"epoch": 7.99,
"learning_rate": 4.074627691967005e-05,
"loss": 1.3812,
"step": 10700
},
{
"epoch": 8.0,
"eval_accuracy": 0.5491359840327749,
"eval_loss": 1.046125054359436,
"eval_runtime": 235.6379,
"eval_samples_per_second": 80.798,
"eval_steps_per_second": 2.525,
"step": 10708
},
{
"epoch": 8.0,
"learning_rate": 4.0732436472346786e-05,
"loss": 1.4003,
"step": 10710
},
{
"epoch": 8.01,
"learning_rate": 4.071859602502353e-05,
"loss": 1.3651,
"step": 10720
},
{
"epoch": 8.02,
"learning_rate": 4.070475557770027e-05,
"loss": 1.3648,
"step": 10730
},
{
"epoch": 8.02,
"learning_rate": 4.0690915130377013e-05,
"loss": 1.3353,
"step": 10740
},
{
"epoch": 8.03,
"learning_rate": 4.067707468305376e-05,
"loss": 1.3074,
"step": 10750
},
{
"epoch": 8.04,
"learning_rate": 4.06632342357305e-05,
"loss": 1.3737,
"step": 10760
},
{
"epoch": 8.05,
"learning_rate": 4.064939378840724e-05,
"loss": 1.3888,
"step": 10770
},
{
"epoch": 8.05,
"learning_rate": 4.0635553341083985e-05,
"loss": 1.3675,
"step": 10780
},
{
"epoch": 8.06,
"learning_rate": 4.062171289376073e-05,
"loss": 1.3628,
"step": 10790
},
{
"epoch": 8.07,
"learning_rate": 4.0607872446437475e-05,
"loss": 1.3708,
"step": 10800
},
{
"epoch": 8.08,
"learning_rate": 4.059403199911421e-05,
"loss": 1.3422,
"step": 10810
},
{
"epoch": 8.08,
"learning_rate": 4.058019155179096e-05,
"loss": 1.346,
"step": 10820
},
{
"epoch": 8.09,
"learning_rate": 4.0566351104467695e-05,
"loss": 1.3538,
"step": 10830
},
{
"epoch": 8.1,
"learning_rate": 4.0552510657144446e-05,
"loss": 1.3424,
"step": 10840
},
{
"epoch": 8.11,
"learning_rate": 4.0538670209821184e-05,
"loss": 1.3276,
"step": 10850
},
{
"epoch": 8.11,
"learning_rate": 4.052482976249792e-05,
"loss": 1.3365,
"step": 10860
},
{
"epoch": 8.12,
"learning_rate": 4.051098931517467e-05,
"loss": 1.3448,
"step": 10870
},
{
"epoch": 8.13,
"learning_rate": 4.049714886785141e-05,
"loss": 1.36,
"step": 10880
},
{
"epoch": 8.14,
"learning_rate": 4.0483308420528156e-05,
"loss": 1.3727,
"step": 10890
},
{
"epoch": 8.14,
"learning_rate": 4.0469467973204894e-05,
"loss": 1.3842,
"step": 10900
},
{
"epoch": 8.15,
"learning_rate": 4.045562752588164e-05,
"loss": 1.3901,
"step": 10910
},
{
"epoch": 8.16,
"learning_rate": 4.0441787078558376e-05,
"loss": 1.3672,
"step": 10920
},
{
"epoch": 8.17,
"learning_rate": 4.042794663123513e-05,
"loss": 1.3544,
"step": 10930
},
{
"epoch": 8.17,
"learning_rate": 4.0414106183911866e-05,
"loss": 1.3048,
"step": 10940
},
{
"epoch": 8.18,
"learning_rate": 4.040026573658861e-05,
"loss": 1.3685,
"step": 10950
},
{
"epoch": 8.19,
"learning_rate": 4.038642528926535e-05,
"loss": 1.3694,
"step": 10960
},
{
"epoch": 8.2,
"learning_rate": 4.037258484194209e-05,
"loss": 1.3407,
"step": 10970
},
{
"epoch": 8.2,
"learning_rate": 4.035874439461884e-05,
"loss": 1.3795,
"step": 10980
},
{
"epoch": 8.21,
"learning_rate": 4.0344903947295575e-05,
"loss": 1.3514,
"step": 10990
},
{
"epoch": 8.22,
"learning_rate": 4.033106349997232e-05,
"loss": 1.402,
"step": 11000
},
{
"epoch": 8.23,
"learning_rate": 4.0317223052649065e-05,
"loss": 1.3242,
"step": 11010
},
{
"epoch": 8.23,
"learning_rate": 4.030338260532581e-05,
"loss": 1.3351,
"step": 11020
},
{
"epoch": 8.24,
"learning_rate": 4.028954215800255e-05,
"loss": 1.3768,
"step": 11030
},
{
"epoch": 8.25,
"learning_rate": 4.027570171067929e-05,
"loss": 1.391,
"step": 11040
},
{
"epoch": 8.26,
"learning_rate": 4.026186126335603e-05,
"loss": 1.3512,
"step": 11050
},
{
"epoch": 8.26,
"learning_rate": 4.0248020816032774e-05,
"loss": 1.3876,
"step": 11060
},
{
"epoch": 8.27,
"learning_rate": 4.023418036870952e-05,
"loss": 1.3611,
"step": 11070
},
{
"epoch": 8.28,
"learning_rate": 4.0220339921386264e-05,
"loss": 1.3663,
"step": 11080
},
{
"epoch": 8.29,
"learning_rate": 4.0206499474063e-05,
"loss": 1.4226,
"step": 11090
},
{
"epoch": 8.29,
"learning_rate": 4.0192659026739746e-05,
"loss": 1.3504,
"step": 11100
},
{
"epoch": 8.3,
"learning_rate": 4.017881857941649e-05,
"loss": 1.4109,
"step": 11110
},
{
"epoch": 8.31,
"learning_rate": 4.0164978132093235e-05,
"loss": 1.3609,
"step": 11120
},
{
"epoch": 8.32,
"learning_rate": 4.015113768476997e-05,
"loss": 1.373,
"step": 11130
},
{
"epoch": 8.32,
"learning_rate": 4.013729723744671e-05,
"loss": 1.3748,
"step": 11140
},
{
"epoch": 8.33,
"learning_rate": 4.012345679012346e-05,
"loss": 1.3699,
"step": 11150
},
{
"epoch": 8.34,
"learning_rate": 4.01096163428002e-05,
"loss": 1.3408,
"step": 11160
},
{
"epoch": 8.35,
"learning_rate": 4.0095775895476945e-05,
"loss": 1.2931,
"step": 11170
},
{
"epoch": 8.35,
"learning_rate": 4.008193544815368e-05,
"loss": 1.3726,
"step": 11180
},
{
"epoch": 8.36,
"learning_rate": 4.006809500083043e-05,
"loss": 1.3375,
"step": 11190
},
{
"epoch": 8.37,
"learning_rate": 4.005425455350717e-05,
"loss": 1.3566,
"step": 11200
},
{
"epoch": 8.38,
"learning_rate": 4.004041410618392e-05,
"loss": 1.3886,
"step": 11210
},
{
"epoch": 8.38,
"learning_rate": 4.0026573658860655e-05,
"loss": 1.3248,
"step": 11220
},
{
"epoch": 8.39,
"learning_rate": 4.00127332115374e-05,
"loss": 1.3375,
"step": 11230
},
{
"epoch": 8.4,
"learning_rate": 3.9998892764214144e-05,
"loss": 1.3406,
"step": 11240
},
{
"epoch": 8.4,
"learning_rate": 3.998505231689089e-05,
"loss": 1.3334,
"step": 11250
},
{
"epoch": 8.41,
"learning_rate": 3.9971211869567626e-05,
"loss": 1.3615,
"step": 11260
},
{
"epoch": 8.42,
"learning_rate": 3.9957371422244364e-05,
"loss": 1.3401,
"step": 11270
},
{
"epoch": 8.43,
"learning_rate": 3.994353097492111e-05,
"loss": 1.3233,
"step": 11280
},
{
"epoch": 8.43,
"learning_rate": 3.9929690527597854e-05,
"loss": 1.3211,
"step": 11290
},
{
"epoch": 8.44,
"learning_rate": 3.99158500802746e-05,
"loss": 1.3545,
"step": 11300
},
{
"epoch": 8.45,
"learning_rate": 3.9902009632951336e-05,
"loss": 1.3432,
"step": 11310
},
{
"epoch": 8.46,
"learning_rate": 3.988816918562808e-05,
"loss": 1.355,
"step": 11320
},
{
"epoch": 8.46,
"learning_rate": 3.9874328738304825e-05,
"loss": 1.3081,
"step": 11330
},
{
"epoch": 8.47,
"learning_rate": 3.986048829098157e-05,
"loss": 1.3432,
"step": 11340
},
{
"epoch": 8.48,
"learning_rate": 3.984664784365831e-05,
"loss": 1.3498,
"step": 11350
},
{
"epoch": 8.49,
"learning_rate": 3.983280739633505e-05,
"loss": 1.3674,
"step": 11360
},
{
"epoch": 8.49,
"learning_rate": 3.981896694901179e-05,
"loss": 1.37,
"step": 11370
},
{
"epoch": 8.5,
"learning_rate": 3.980512650168854e-05,
"loss": 1.3497,
"step": 11380
},
{
"epoch": 8.51,
"learning_rate": 3.979128605436528e-05,
"loss": 1.3654,
"step": 11390
},
{
"epoch": 8.52,
"learning_rate": 3.9777445607042024e-05,
"loss": 1.3485,
"step": 11400
},
{
"epoch": 8.52,
"learning_rate": 3.976360515971876e-05,
"loss": 1.3643,
"step": 11410
},
{
"epoch": 8.53,
"learning_rate": 3.974976471239551e-05,
"loss": 1.371,
"step": 11420
},
{
"epoch": 8.54,
"learning_rate": 3.973592426507225e-05,
"loss": 1.3634,
"step": 11430
},
{
"epoch": 8.55,
"learning_rate": 3.972208381774899e-05,
"loss": 1.318,
"step": 11440
},
{
"epoch": 8.55,
"learning_rate": 3.9708243370425734e-05,
"loss": 1.386,
"step": 11450
},
{
"epoch": 8.56,
"learning_rate": 3.969440292310247e-05,
"loss": 1.3363,
"step": 11460
},
{
"epoch": 8.57,
"learning_rate": 3.968056247577922e-05,
"loss": 1.3063,
"step": 11470
},
{
"epoch": 8.58,
"learning_rate": 3.966672202845596e-05,
"loss": 1.3467,
"step": 11480
},
{
"epoch": 8.58,
"learning_rate": 3.9652881581132706e-05,
"loss": 1.3795,
"step": 11490
},
{
"epoch": 8.59,
"learning_rate": 3.9639041133809444e-05,
"loss": 1.3711,
"step": 11500
},
{
"epoch": 8.6,
"learning_rate": 3.962520068648619e-05,
"loss": 1.3348,
"step": 11510
},
{
"epoch": 8.61,
"learning_rate": 3.961136023916293e-05,
"loss": 1.3518,
"step": 11520
},
{
"epoch": 8.61,
"learning_rate": 3.959751979183968e-05,
"loss": 1.3021,
"step": 11530
},
{
"epoch": 8.62,
"learning_rate": 3.9583679344516415e-05,
"loss": 1.3551,
"step": 11540
},
{
"epoch": 8.63,
"learning_rate": 3.956983889719315e-05,
"loss": 1.3393,
"step": 11550
},
{
"epoch": 8.64,
"learning_rate": 3.9555998449869905e-05,
"loss": 1.3846,
"step": 11560
},
{
"epoch": 8.64,
"learning_rate": 3.954215800254664e-05,
"loss": 1.3375,
"step": 11570
},
{
"epoch": 8.65,
"learning_rate": 3.952831755522339e-05,
"loss": 1.3556,
"step": 11580
},
{
"epoch": 8.66,
"learning_rate": 3.9514477107900125e-05,
"loss": 1.3888,
"step": 11590
},
{
"epoch": 8.67,
"learning_rate": 3.950063666057687e-05,
"loss": 1.3692,
"step": 11600
},
{
"epoch": 8.67,
"learning_rate": 3.9486796213253614e-05,
"loss": 1.3883,
"step": 11610
},
{
"epoch": 8.68,
"learning_rate": 3.947295576593036e-05,
"loss": 1.3373,
"step": 11620
},
{
"epoch": 8.69,
"learning_rate": 3.94591153186071e-05,
"loss": 1.3969,
"step": 11630
},
{
"epoch": 8.7,
"learning_rate": 3.944527487128384e-05,
"loss": 1.3781,
"step": 11640
},
{
"epoch": 8.7,
"learning_rate": 3.9431434423960586e-05,
"loss": 1.3409,
"step": 11650
},
{
"epoch": 8.71,
"learning_rate": 3.941759397663733e-05,
"loss": 1.3275,
"step": 11660
},
{
"epoch": 8.72,
"learning_rate": 3.940375352931407e-05,
"loss": 1.3082,
"step": 11670
},
{
"epoch": 8.73,
"learning_rate": 3.9389913081990807e-05,
"loss": 1.3243,
"step": 11680
},
{
"epoch": 8.73,
"learning_rate": 3.937607263466756e-05,
"loss": 1.3616,
"step": 11690
},
{
"epoch": 8.74,
"learning_rate": 3.9362232187344296e-05,
"loss": 1.3628,
"step": 11700
},
{
"epoch": 8.75,
"learning_rate": 3.934839174002104e-05,
"loss": 1.3604,
"step": 11710
},
{
"epoch": 8.76,
"learning_rate": 3.933455129269778e-05,
"loss": 1.3174,
"step": 11720
},
{
"epoch": 8.76,
"learning_rate": 3.932071084537452e-05,
"loss": 1.3762,
"step": 11730
},
{
"epoch": 8.77,
"learning_rate": 3.930687039805127e-05,
"loss": 1.3293,
"step": 11740
},
{
"epoch": 8.78,
"learning_rate": 3.929302995072801e-05,
"loss": 1.3215,
"step": 11750
},
{
"epoch": 8.79,
"learning_rate": 3.927918950340475e-05,
"loss": 1.3857,
"step": 11760
},
{
"epoch": 8.79,
"learning_rate": 3.9265349056081495e-05,
"loss": 1.349,
"step": 11770
},
{
"epoch": 8.8,
"learning_rate": 3.925150860875824e-05,
"loss": 1.3374,
"step": 11780
},
{
"epoch": 8.81,
"learning_rate": 3.9237668161434984e-05,
"loss": 1.3493,
"step": 11790
},
{
"epoch": 8.82,
"learning_rate": 3.922382771411172e-05,
"loss": 1.3645,
"step": 11800
},
{
"epoch": 8.82,
"learning_rate": 3.9209987266788467e-05,
"loss": 1.3549,
"step": 11810
},
{
"epoch": 8.83,
"learning_rate": 3.9196146819465204e-05,
"loss": 1.3527,
"step": 11820
},
{
"epoch": 8.84,
"learning_rate": 3.918230637214195e-05,
"loss": 1.3417,
"step": 11830
},
{
"epoch": 8.85,
"learning_rate": 3.9168465924818694e-05,
"loss": 1.3968,
"step": 11840
},
{
"epoch": 8.85,
"learning_rate": 3.915462547749543e-05,
"loss": 1.32,
"step": 11850
},
{
"epoch": 8.86,
"learning_rate": 3.9140785030172176e-05,
"loss": 1.3736,
"step": 11860
},
{
"epoch": 8.87,
"learning_rate": 3.912694458284892e-05,
"loss": 1.3285,
"step": 11870
},
{
"epoch": 8.88,
"learning_rate": 3.9113104135525665e-05,
"loss": 1.3523,
"step": 11880
},
{
"epoch": 8.88,
"learning_rate": 3.90992636882024e-05,
"loss": 1.3555,
"step": 11890
},
{
"epoch": 8.89,
"learning_rate": 3.908542324087915e-05,
"loss": 1.3641,
"step": 11900
},
{
"epoch": 8.9,
"learning_rate": 3.9071582793555886e-05,
"loss": 1.3421,
"step": 11910
},
{
"epoch": 8.91,
"learning_rate": 3.905774234623264e-05,
"loss": 1.3296,
"step": 11920
},
{
"epoch": 8.91,
"learning_rate": 3.9043901898909375e-05,
"loss": 1.3141,
"step": 11930
},
{
"epoch": 8.92,
"learning_rate": 3.903006145158612e-05,
"loss": 1.353,
"step": 11940
},
{
"epoch": 8.93,
"learning_rate": 3.901622100426286e-05,
"loss": 1.3307,
"step": 11950
},
{
"epoch": 8.94,
"learning_rate": 3.90023805569396e-05,
"loss": 1.3382,
"step": 11960
},
{
"epoch": 8.94,
"learning_rate": 3.898854010961635e-05,
"loss": 1.3759,
"step": 11970
},
{
"epoch": 8.95,
"learning_rate": 3.8974699662293085e-05,
"loss": 1.3497,
"step": 11980
},
{
"epoch": 8.96,
"learning_rate": 3.896085921496983e-05,
"loss": 1.3662,
"step": 11990
},
{
"epoch": 8.97,
"learning_rate": 3.894701876764657e-05,
"loss": 1.3519,
"step": 12000
},
{
"epoch": 8.97,
"learning_rate": 3.893317832032332e-05,
"loss": 1.3312,
"step": 12010
},
{
"epoch": 8.98,
"learning_rate": 3.8919337873000057e-05,
"loss": 1.3139,
"step": 12020
},
{
"epoch": 8.99,
"learning_rate": 3.89054974256768e-05,
"loss": 1.3807,
"step": 12030
},
{
"epoch": 9.0,
"learning_rate": 3.889165697835354e-05,
"loss": 1.3494,
"step": 12040
},
{
"epoch": 9.0,
"eval_accuracy": 0.5486107463627291,
"eval_loss": 1.044506549835205,
"eval_runtime": 231.0991,
"eval_samples_per_second": 82.385,
"eval_steps_per_second": 2.575,
"step": 12046
},
{
"epoch": 9.0,
"learning_rate": 3.8877816531030284e-05,
"loss": 1.3352,
"step": 12050
},
{
"epoch": 9.01,
"learning_rate": 3.886397608370703e-05,
"loss": 1.3725,
"step": 12060
},
{
"epoch": 9.02,
"learning_rate": 3.885013563638377e-05,
"loss": 1.3775,
"step": 12070
},
{
"epoch": 9.03,
"learning_rate": 3.883629518906051e-05,
"loss": 1.3761,
"step": 12080
},
{
"epoch": 9.03,
"learning_rate": 3.8822454741737256e-05,
"loss": 1.3551,
"step": 12090
},
{
"epoch": 9.04,
"learning_rate": 3.8808614294414e-05,
"loss": 1.3271,
"step": 12100
},
{
"epoch": 9.05,
"learning_rate": 3.879477384709074e-05,
"loss": 1.3636,
"step": 12110
},
{
"epoch": 9.05,
"learning_rate": 3.878093339976748e-05,
"loss": 1.3671,
"step": 12120
},
{
"epoch": 9.06,
"learning_rate": 3.876709295244422e-05,
"loss": 1.3095,
"step": 12130
},
{
"epoch": 9.07,
"learning_rate": 3.8753252505120965e-05,
"loss": 1.3745,
"step": 12140
},
{
"epoch": 9.08,
"learning_rate": 3.873941205779771e-05,
"loss": 1.3583,
"step": 12150
},
{
"epoch": 9.08,
"learning_rate": 3.8725571610474454e-05,
"loss": 1.3512,
"step": 12160
},
{
"epoch": 9.09,
"learning_rate": 3.871173116315119e-05,
"loss": 1.2891,
"step": 12170
},
{
"epoch": 9.1,
"learning_rate": 3.869789071582794e-05,
"loss": 1.3562,
"step": 12180
},
{
"epoch": 9.11,
"learning_rate": 3.868405026850468e-05,
"loss": 1.2822,
"step": 12190
},
{
"epoch": 9.11,
"learning_rate": 3.8670209821181426e-05,
"loss": 1.3552,
"step": 12200
},
{
"epoch": 9.12,
"learning_rate": 3.8656369373858164e-05,
"loss": 1.3388,
"step": 12210
},
{
"epoch": 9.13,
"learning_rate": 3.864252892653491e-05,
"loss": 1.37,
"step": 12220
},
{
"epoch": 9.14,
"learning_rate": 3.862868847921165e-05,
"loss": 1.3886,
"step": 12230
},
{
"epoch": 9.14,
"learning_rate": 3.861484803188839e-05,
"loss": 1.3162,
"step": 12240
},
{
"epoch": 9.15,
"learning_rate": 3.8601007584565136e-05,
"loss": 1.3692,
"step": 12250
},
{
"epoch": 9.16,
"learning_rate": 3.8587167137241874e-05,
"loss": 1.3342,
"step": 12260
},
{
"epoch": 9.17,
"learning_rate": 3.857332668991862e-05,
"loss": 1.3694,
"step": 12270
},
{
"epoch": 9.17,
"learning_rate": 3.855948624259536e-05,
"loss": 1.3719,
"step": 12280
},
{
"epoch": 9.18,
"learning_rate": 3.854564579527211e-05,
"loss": 1.3865,
"step": 12290
},
{
"epoch": 9.19,
"learning_rate": 3.8531805347948846e-05,
"loss": 1.3152,
"step": 12300
},
{
"epoch": 9.2,
"learning_rate": 3.851796490062559e-05,
"loss": 1.3557,
"step": 12310
},
{
"epoch": 9.2,
"learning_rate": 3.8504124453302335e-05,
"loss": 1.3919,
"step": 12320
},
{
"epoch": 9.21,
"learning_rate": 3.849028400597908e-05,
"loss": 1.3474,
"step": 12330
},
{
"epoch": 9.22,
"learning_rate": 3.847644355865582e-05,
"loss": 1.3794,
"step": 12340
},
{
"epoch": 9.23,
"learning_rate": 3.846260311133256e-05,
"loss": 1.3272,
"step": 12350
},
{
"epoch": 9.23,
"learning_rate": 3.84487626640093e-05,
"loss": 1.3655,
"step": 12360
},
{
"epoch": 9.24,
"learning_rate": 3.8434922216686045e-05,
"loss": 1.334,
"step": 12370
},
{
"epoch": 9.25,
"learning_rate": 3.842108176936279e-05,
"loss": 1.3127,
"step": 12380
},
{
"epoch": 9.26,
"learning_rate": 3.840724132203953e-05,
"loss": 1.3253,
"step": 12390
},
{
"epoch": 9.26,
"learning_rate": 3.839340087471627e-05,
"loss": 1.3466,
"step": 12400
},
{
"epoch": 9.27,
"learning_rate": 3.8379560427393016e-05,
"loss": 1.3681,
"step": 12410
},
{
"epoch": 9.28,
"learning_rate": 3.836571998006976e-05,
"loss": 1.3236,
"step": 12420
},
{
"epoch": 9.29,
"learning_rate": 3.83518795327465e-05,
"loss": 1.3082,
"step": 12430
},
{
"epoch": 9.29,
"learning_rate": 3.8338039085423243e-05,
"loss": 1.3155,
"step": 12440
},
{
"epoch": 9.3,
"learning_rate": 3.832419863809998e-05,
"loss": 1.3815,
"step": 12450
},
{
"epoch": 9.31,
"learning_rate": 3.831035819077673e-05,
"loss": 1.3217,
"step": 12460
},
{
"epoch": 9.32,
"learning_rate": 3.829651774345347e-05,
"loss": 1.3239,
"step": 12470
},
{
"epoch": 9.32,
"learning_rate": 3.8282677296130215e-05,
"loss": 1.3819,
"step": 12480
},
{
"epoch": 9.33,
"learning_rate": 3.826883684880695e-05,
"loss": 1.3529,
"step": 12490
},
{
"epoch": 9.34,
"learning_rate": 3.82549964014837e-05,
"loss": 1.3617,
"step": 12500
},
{
"epoch": 9.35,
"learning_rate": 3.824115595416044e-05,
"loss": 1.3755,
"step": 12510
},
{
"epoch": 9.35,
"learning_rate": 3.822731550683718e-05,
"loss": 1.3193,
"step": 12520
},
{
"epoch": 9.36,
"learning_rate": 3.8213475059513925e-05,
"loss": 1.3589,
"step": 12530
},
{
"epoch": 9.37,
"learning_rate": 3.819963461219066e-05,
"loss": 1.3443,
"step": 12540
},
{
"epoch": 9.38,
"learning_rate": 3.8185794164867414e-05,
"loss": 1.3601,
"step": 12550
},
{
"epoch": 9.38,
"learning_rate": 3.817195371754415e-05,
"loss": 1.3522,
"step": 12560
},
{
"epoch": 9.39,
"learning_rate": 3.81581132702209e-05,
"loss": 1.3306,
"step": 12570
},
{
"epoch": 9.4,
"learning_rate": 3.8144272822897635e-05,
"loss": 1.3759,
"step": 12580
},
{
"epoch": 9.41,
"learning_rate": 3.813043237557438e-05,
"loss": 1.3465,
"step": 12590
},
{
"epoch": 9.41,
"learning_rate": 3.8116591928251124e-05,
"loss": 1.3654,
"step": 12600
},
{
"epoch": 9.42,
"learning_rate": 3.810275148092787e-05,
"loss": 1.3498,
"step": 12610
},
{
"epoch": 9.43,
"learning_rate": 3.8088911033604606e-05,
"loss": 1.3136,
"step": 12620
},
{
"epoch": 9.44,
"learning_rate": 3.807507058628135e-05,
"loss": 1.3299,
"step": 12630
},
{
"epoch": 9.44,
"learning_rate": 3.8061230138958096e-05,
"loss": 1.3503,
"step": 12640
},
{
"epoch": 9.45,
"learning_rate": 3.8047389691634834e-05,
"loss": 1.3553,
"step": 12650
},
{
"epoch": 9.46,
"learning_rate": 3.803354924431158e-05,
"loss": 1.3531,
"step": 12660
},
{
"epoch": 9.47,
"learning_rate": 3.8019708796988316e-05,
"loss": 1.3549,
"step": 12670
},
{
"epoch": 9.47,
"learning_rate": 3.800586834966506e-05,
"loss": 1.3648,
"step": 12680
},
{
"epoch": 9.48,
"learning_rate": 3.7992027902341805e-05,
"loss": 1.3138,
"step": 12690
},
{
"epoch": 9.49,
"learning_rate": 3.797818745501855e-05,
"loss": 1.3613,
"step": 12700
},
{
"epoch": 9.5,
"learning_rate": 3.796434700769529e-05,
"loss": 1.3787,
"step": 12710
},
{
"epoch": 9.5,
"learning_rate": 3.795050656037203e-05,
"loss": 1.3575,
"step": 12720
},
{
"epoch": 9.51,
"learning_rate": 3.793666611304878e-05,
"loss": 1.3604,
"step": 12730
},
{
"epoch": 9.52,
"learning_rate": 3.792282566572552e-05,
"loss": 1.3721,
"step": 12740
},
{
"epoch": 9.53,
"learning_rate": 3.790898521840226e-05,
"loss": 1.3346,
"step": 12750
},
{
"epoch": 9.53,
"learning_rate": 3.7895144771079004e-05,
"loss": 1.3464,
"step": 12760
},
{
"epoch": 9.54,
"learning_rate": 3.788130432375574e-05,
"loss": 1.2985,
"step": 12770
},
{
"epoch": 9.55,
"learning_rate": 3.7867463876432494e-05,
"loss": 1.3636,
"step": 12780
},
{
"epoch": 9.56,
"learning_rate": 3.785362342910923e-05,
"loss": 1.3328,
"step": 12790
},
{
"epoch": 9.56,
"learning_rate": 3.783978298178597e-05,
"loss": 1.3428,
"step": 12800
},
{
"epoch": 9.57,
"learning_rate": 3.7825942534462714e-05,
"loss": 1.3596,
"step": 12810
},
{
"epoch": 9.58,
"learning_rate": 3.781210208713946e-05,
"loss": 1.313,
"step": 12820
},
{
"epoch": 9.59,
"learning_rate": 3.77982616398162e-05,
"loss": 1.3831,
"step": 12830
},
{
"epoch": 9.59,
"learning_rate": 3.778442119249294e-05,
"loss": 1.3568,
"step": 12840
},
{
"epoch": 9.6,
"learning_rate": 3.7770580745169686e-05,
"loss": 1.3375,
"step": 12850
},
{
"epoch": 9.61,
"learning_rate": 3.7756740297846424e-05,
"loss": 1.3541,
"step": 12860
},
{
"epoch": 9.62,
"learning_rate": 3.7742899850523175e-05,
"loss": 1.3118,
"step": 12870
},
{
"epoch": 9.62,
"learning_rate": 3.772905940319991e-05,
"loss": 1.2975,
"step": 12880
},
{
"epoch": 9.63,
"learning_rate": 3.771521895587666e-05,
"loss": 1.3723,
"step": 12890
},
{
"epoch": 9.64,
"learning_rate": 3.7701378508553395e-05,
"loss": 1.3924,
"step": 12900
},
{
"epoch": 9.65,
"learning_rate": 3.768753806123014e-05,
"loss": 1.3664,
"step": 12910
},
{
"epoch": 9.65,
"learning_rate": 3.7673697613906885e-05,
"loss": 1.3274,
"step": 12920
},
{
"epoch": 9.66,
"learning_rate": 3.765985716658362e-05,
"loss": 1.3708,
"step": 12930
},
{
"epoch": 9.67,
"learning_rate": 3.764601671926037e-05,
"loss": 1.3257,
"step": 12940
},
{
"epoch": 9.68,
"learning_rate": 3.763217627193711e-05,
"loss": 1.3476,
"step": 12950
},
{
"epoch": 9.68,
"learning_rate": 3.7618335824613856e-05,
"loss": 1.3382,
"step": 12960
},
{
"epoch": 9.69,
"learning_rate": 3.7604495377290594e-05,
"loss": 1.2679,
"step": 12970
},
{
"epoch": 9.7,
"learning_rate": 3.759065492996734e-05,
"loss": 1.3314,
"step": 12980
},
{
"epoch": 9.7,
"learning_rate": 3.757681448264408e-05,
"loss": 1.3653,
"step": 12990
},
{
"epoch": 9.71,
"learning_rate": 3.756297403532082e-05,
"loss": 1.3493,
"step": 13000
},
{
"epoch": 9.72,
"learning_rate": 3.7549133587997566e-05,
"loss": 1.3177,
"step": 13010
},
{
"epoch": 9.73,
"learning_rate": 3.753529314067431e-05,
"loss": 1.366,
"step": 13020
},
{
"epoch": 9.73,
"learning_rate": 3.752145269335105e-05,
"loss": 1.3585,
"step": 13030
},
{
"epoch": 9.74,
"learning_rate": 3.750761224602779e-05,
"loss": 1.3038,
"step": 13040
},
{
"epoch": 9.75,
"learning_rate": 3.749377179870454e-05,
"loss": 1.3237,
"step": 13050
},
{
"epoch": 9.76,
"learning_rate": 3.747993135138128e-05,
"loss": 1.3358,
"step": 13060
},
{
"epoch": 9.76,
"learning_rate": 3.746609090405802e-05,
"loss": 1.3068,
"step": 13070
},
{
"epoch": 9.77,
"learning_rate": 3.745225045673476e-05,
"loss": 1.3664,
"step": 13080
},
{
"epoch": 9.78,
"learning_rate": 3.743841000941151e-05,
"loss": 1.3885,
"step": 13090
},
{
"epoch": 9.79,
"learning_rate": 3.742456956208825e-05,
"loss": 1.3254,
"step": 13100
},
{
"epoch": 9.79,
"learning_rate": 3.741072911476499e-05,
"loss": 1.3438,
"step": 13110
},
{
"epoch": 9.8,
"learning_rate": 3.739688866744173e-05,
"loss": 1.3471,
"step": 13120
},
{
"epoch": 9.81,
"learning_rate": 3.7383048220118475e-05,
"loss": 1.3303,
"step": 13130
},
{
"epoch": 9.82,
"learning_rate": 3.736920777279522e-05,
"loss": 1.3223,
"step": 13140
},
{
"epoch": 9.82,
"learning_rate": 3.7355367325471964e-05,
"loss": 1.3675,
"step": 13150
},
{
"epoch": 9.83,
"learning_rate": 3.73415268781487e-05,
"loss": 1.3661,
"step": 13160
},
{
"epoch": 9.84,
"learning_rate": 3.7327686430825447e-05,
"loss": 1.3076,
"step": 13170
},
{
"epoch": 9.85,
"learning_rate": 3.731384598350219e-05,
"loss": 1.3231,
"step": 13180
},
{
"epoch": 9.85,
"learning_rate": 3.7300005536178936e-05,
"loss": 1.3405,
"step": 13190
},
{
"epoch": 9.86,
"learning_rate": 3.7286165088855674e-05,
"loss": 1.3288,
"step": 13200
},
{
"epoch": 9.87,
"learning_rate": 3.727232464153241e-05,
"loss": 1.3377,
"step": 13210
},
{
"epoch": 9.88,
"learning_rate": 3.7258484194209156e-05,
"loss": 1.3228,
"step": 13220
},
{
"epoch": 9.88,
"learning_rate": 3.72446437468859e-05,
"loss": 1.3206,
"step": 13230
},
{
"epoch": 9.89,
"learning_rate": 3.7230803299562645e-05,
"loss": 1.3131,
"step": 13240
},
{
"epoch": 9.9,
"learning_rate": 3.721696285223938e-05,
"loss": 1.3392,
"step": 13250
},
{
"epoch": 9.91,
"learning_rate": 3.720312240491613e-05,
"loss": 1.374,
"step": 13260
},
{
"epoch": 9.91,
"learning_rate": 3.718928195759287e-05,
"loss": 1.3191,
"step": 13270
},
{
"epoch": 9.92,
"learning_rate": 3.717544151026962e-05,
"loss": 1.3503,
"step": 13280
},
{
"epoch": 9.93,
"learning_rate": 3.7161601062946355e-05,
"loss": 1.361,
"step": 13290
},
{
"epoch": 9.94,
"learning_rate": 3.71477606156231e-05,
"loss": 1.3748,
"step": 13300
},
{
"epoch": 9.94,
"learning_rate": 3.713392016829984e-05,
"loss": 1.2906,
"step": 13310
},
{
"epoch": 9.95,
"learning_rate": 3.712007972097659e-05,
"loss": 1.3676,
"step": 13320
},
{
"epoch": 9.96,
"learning_rate": 3.710623927365333e-05,
"loss": 1.3187,
"step": 13330
},
{
"epoch": 9.97,
"learning_rate": 3.709239882633007e-05,
"loss": 1.3831,
"step": 13340
},
{
"epoch": 9.97,
"learning_rate": 3.707855837900681e-05,
"loss": 1.3529,
"step": 13350
},
{
"epoch": 9.98,
"learning_rate": 3.7064717931683554e-05,
"loss": 1.3264,
"step": 13360
},
{
"epoch": 9.99,
"learning_rate": 3.70508774843603e-05,
"loss": 1.3594,
"step": 13370
},
{
"epoch": 10.0,
"learning_rate": 3.7037037037037037e-05,
"loss": 1.3555,
"step": 13380
},
{
"epoch": 10.0,
"eval_accuracy": 0.5693051105625295,
"eval_loss": 0.9972643256187439,
"eval_runtime": 240.7214,
"eval_samples_per_second": 79.091,
"eval_steps_per_second": 2.472,
"step": 13385
},
{
"epoch": 10.0,
"learning_rate": 3.702319658971378e-05,
"loss": 1.3562,
"step": 13390
},
{
"epoch": 10.01,
"learning_rate": 3.700935614239052e-05,
"loss": 1.3272,
"step": 13400
},
{
"epoch": 10.02,
"learning_rate": 3.699551569506727e-05,
"loss": 1.3645,
"step": 13410
},
{
"epoch": 10.03,
"learning_rate": 3.698167524774401e-05,
"loss": 1.3399,
"step": 13420
},
{
"epoch": 10.03,
"learning_rate": 3.696783480042075e-05,
"loss": 1.298,
"step": 13430
},
{
"epoch": 10.04,
"learning_rate": 3.695399435309749e-05,
"loss": 1.3199,
"step": 13440
},
{
"epoch": 10.05,
"learning_rate": 3.6940153905774235e-05,
"loss": 1.3075,
"step": 13450
},
{
"epoch": 10.06,
"learning_rate": 3.692631345845098e-05,
"loss": 1.3463,
"step": 13460
},
{
"epoch": 10.06,
"learning_rate": 3.6912473011127725e-05,
"loss": 1.3237,
"step": 13470
},
{
"epoch": 10.07,
"learning_rate": 3.689863256380446e-05,
"loss": 1.3499,
"step": 13480
},
{
"epoch": 10.08,
"learning_rate": 3.68847921164812e-05,
"loss": 1.3259,
"step": 13490
},
{
"epoch": 10.09,
"learning_rate": 3.687095166915795e-05,
"loss": 1.3177,
"step": 13500
},
{
"epoch": 10.09,
"learning_rate": 3.685711122183469e-05,
"loss": 1.3397,
"step": 13510
},
{
"epoch": 10.1,
"learning_rate": 3.6843270774511434e-05,
"loss": 1.3263,
"step": 13520
},
{
"epoch": 10.11,
"learning_rate": 3.682943032718817e-05,
"loss": 1.338,
"step": 13530
},
{
"epoch": 10.12,
"learning_rate": 3.681558987986492e-05,
"loss": 1.3753,
"step": 13540
},
{
"epoch": 10.12,
"learning_rate": 3.680174943254166e-05,
"loss": 1.3382,
"step": 13550
},
{
"epoch": 10.13,
"learning_rate": 3.6787908985218406e-05,
"loss": 1.32,
"step": 13560
},
{
"epoch": 10.14,
"learning_rate": 3.6774068537895144e-05,
"loss": 1.3694,
"step": 13570
},
{
"epoch": 10.15,
"learning_rate": 3.676022809057189e-05,
"loss": 1.3044,
"step": 13580
},
{
"epoch": 10.15,
"learning_rate": 3.674638764324863e-05,
"loss": 1.3439,
"step": 13590
},
{
"epoch": 10.16,
"learning_rate": 3.673254719592538e-05,
"loss": 1.3611,
"step": 13600
},
{
"epoch": 10.17,
"learning_rate": 3.6718706748602116e-05,
"loss": 1.3254,
"step": 13610
},
{
"epoch": 10.18,
"learning_rate": 3.670486630127886e-05,
"loss": 1.3478,
"step": 13620
},
{
"epoch": 10.18,
"learning_rate": 3.66910258539556e-05,
"loss": 1.3306,
"step": 13630
},
{
"epoch": 10.19,
"learning_rate": 3.667718540663234e-05,
"loss": 1.3379,
"step": 13640
},
{
"epoch": 10.2,
"learning_rate": 3.666334495930909e-05,
"loss": 1.3351,
"step": 13650
},
{
"epoch": 10.21,
"learning_rate": 3.6649504511985826e-05,
"loss": 1.3494,
"step": 13660
},
{
"epoch": 10.21,
"learning_rate": 3.663566406466257e-05,
"loss": 1.3381,
"step": 13670
},
{
"epoch": 10.22,
"learning_rate": 3.6621823617339315e-05,
"loss": 1.3635,
"step": 13680
},
{
"epoch": 10.23,
"learning_rate": 3.660798317001606e-05,
"loss": 1.3158,
"step": 13690
},
{
"epoch": 10.24,
"learning_rate": 3.65941427226928e-05,
"loss": 1.3379,
"step": 13700
},
{
"epoch": 10.24,
"learning_rate": 3.658030227536954e-05,
"loss": 1.2988,
"step": 13710
},
{
"epoch": 10.25,
"learning_rate": 3.656646182804629e-05,
"loss": 1.314,
"step": 13720
},
{
"epoch": 10.26,
"learning_rate": 3.655262138072303e-05,
"loss": 1.3519,
"step": 13730
},
{
"epoch": 10.27,
"learning_rate": 3.653878093339977e-05,
"loss": 1.3086,
"step": 13740
},
{
"epoch": 10.27,
"learning_rate": 3.6524940486076514e-05,
"loss": 1.344,
"step": 13750
},
{
"epoch": 10.28,
"learning_rate": 3.651110003875325e-05,
"loss": 1.3455,
"step": 13760
},
{
"epoch": 10.29,
"learning_rate": 3.6497259591429996e-05,
"loss": 1.346,
"step": 13770
},
{
"epoch": 10.3,
"learning_rate": 3.648341914410674e-05,
"loss": 1.3306,
"step": 13780
},
{
"epoch": 10.3,
"learning_rate": 3.646957869678348e-05,
"loss": 1.3653,
"step": 13790
},
{
"epoch": 10.31,
"learning_rate": 3.6455738249460223e-05,
"loss": 1.3539,
"step": 13800
},
{
"epoch": 10.32,
"learning_rate": 3.644189780213697e-05,
"loss": 1.2984,
"step": 13810
},
{
"epoch": 10.32,
"learning_rate": 3.642805735481371e-05,
"loss": 1.3626,
"step": 13820
},
{
"epoch": 10.33,
"learning_rate": 3.641421690749045e-05,
"loss": 1.3382,
"step": 13830
},
{
"epoch": 10.34,
"learning_rate": 3.6400376460167195e-05,
"loss": 1.3283,
"step": 13840
},
{
"epoch": 10.35,
"learning_rate": 3.638653601284393e-05,
"loss": 1.3746,
"step": 13850
},
{
"epoch": 10.35,
"learning_rate": 3.6372695565520685e-05,
"loss": 1.3581,
"step": 13860
},
{
"epoch": 10.36,
"learning_rate": 3.635885511819742e-05,
"loss": 1.3348,
"step": 13870
},
{
"epoch": 10.37,
"learning_rate": 3.634501467087417e-05,
"loss": 1.3973,
"step": 13880
},
{
"epoch": 10.38,
"learning_rate": 3.6331174223550905e-05,
"loss": 1.3467,
"step": 13890
},
{
"epoch": 10.38,
"learning_rate": 3.631733377622765e-05,
"loss": 1.3263,
"step": 13900
},
{
"epoch": 10.39,
"learning_rate": 3.6303493328904394e-05,
"loss": 1.3416,
"step": 13910
},
{
"epoch": 10.4,
"learning_rate": 3.628965288158113e-05,
"loss": 1.3118,
"step": 13920
},
{
"epoch": 10.41,
"learning_rate": 3.627581243425788e-05,
"loss": 1.3581,
"step": 13930
},
{
"epoch": 10.41,
"learning_rate": 3.6261971986934615e-05,
"loss": 1.3306,
"step": 13940
},
{
"epoch": 10.42,
"learning_rate": 3.6248131539611366e-05,
"loss": 1.3573,
"step": 13950
},
{
"epoch": 10.43,
"learning_rate": 3.6234291092288104e-05,
"loss": 1.3603,
"step": 13960
},
{
"epoch": 10.44,
"learning_rate": 3.622045064496485e-05,
"loss": 1.3159,
"step": 13970
},
{
"epoch": 10.44,
"learning_rate": 3.6206610197641586e-05,
"loss": 1.3427,
"step": 13980
},
{
"epoch": 10.45,
"learning_rate": 3.619276975031833e-05,
"loss": 1.3453,
"step": 13990
},
{
"epoch": 10.46,
"learning_rate": 3.6178929302995076e-05,
"loss": 1.3396,
"step": 14000
},
{
"epoch": 10.47,
"learning_rate": 3.616508885567182e-05,
"loss": 1.364,
"step": 14010
},
{
"epoch": 10.47,
"learning_rate": 3.615124840834856e-05,
"loss": 1.3459,
"step": 14020
},
{
"epoch": 10.48,
"learning_rate": 3.61374079610253e-05,
"loss": 1.3709,
"step": 14030
},
{
"epoch": 10.49,
"learning_rate": 3.612356751370205e-05,
"loss": 1.3108,
"step": 14040
},
{
"epoch": 10.5,
"learning_rate": 3.6109727066378785e-05,
"loss": 1.3705,
"step": 14050
},
{
"epoch": 10.5,
"learning_rate": 3.609588661905553e-05,
"loss": 1.3664,
"step": 14060
},
{
"epoch": 10.51,
"learning_rate": 3.608204617173227e-05,
"loss": 1.3253,
"step": 14070
},
{
"epoch": 10.52,
"learning_rate": 3.606820572440901e-05,
"loss": 1.3415,
"step": 14080
},
{
"epoch": 10.53,
"learning_rate": 3.605436527708576e-05,
"loss": 1.3784,
"step": 14090
},
{
"epoch": 10.53,
"learning_rate": 3.60405248297625e-05,
"loss": 1.3291,
"step": 14100
},
{
"epoch": 10.54,
"learning_rate": 3.602668438243924e-05,
"loss": 1.3393,
"step": 14110
},
{
"epoch": 10.55,
"learning_rate": 3.6012843935115984e-05,
"loss": 1.3578,
"step": 14120
},
{
"epoch": 10.56,
"learning_rate": 3.599900348779273e-05,
"loss": 1.3298,
"step": 14130
},
{
"epoch": 10.56,
"learning_rate": 3.5985163040469473e-05,
"loss": 1.3473,
"step": 14140
},
{
"epoch": 10.57,
"learning_rate": 3.597132259314621e-05,
"loss": 1.3187,
"step": 14150
},
{
"epoch": 10.58,
"learning_rate": 3.5957482145822956e-05,
"loss": 1.3373,
"step": 14160
},
{
"epoch": 10.59,
"learning_rate": 3.5943641698499694e-05,
"loss": 1.3025,
"step": 14170
},
{
"epoch": 10.59,
"learning_rate": 3.592980125117644e-05,
"loss": 1.358,
"step": 14180
},
{
"epoch": 10.6,
"learning_rate": 3.591596080385318e-05,
"loss": 1.322,
"step": 14190
},
{
"epoch": 10.61,
"learning_rate": 3.590212035652992e-05,
"loss": 1.3745,
"step": 14200
},
{
"epoch": 10.62,
"learning_rate": 3.5888279909206666e-05,
"loss": 1.3442,
"step": 14210
},
{
"epoch": 10.62,
"learning_rate": 3.587443946188341e-05,
"loss": 1.3131,
"step": 14220
},
{
"epoch": 10.63,
"learning_rate": 3.5860599014560155e-05,
"loss": 1.3569,
"step": 14230
},
{
"epoch": 10.64,
"learning_rate": 3.584675856723689e-05,
"loss": 1.3541,
"step": 14240
},
{
"epoch": 10.65,
"learning_rate": 3.583291811991364e-05,
"loss": 1.312,
"step": 14250
},
{
"epoch": 10.65,
"learning_rate": 3.581907767259038e-05,
"loss": 1.3361,
"step": 14260
},
{
"epoch": 10.66,
"learning_rate": 3.580523722526713e-05,
"loss": 1.3693,
"step": 14270
},
{
"epoch": 10.67,
"learning_rate": 3.5791396777943865e-05,
"loss": 1.3249,
"step": 14280
},
{
"epoch": 10.68,
"learning_rate": 3.577755633062061e-05,
"loss": 1.3255,
"step": 14290
},
{
"epoch": 10.68,
"learning_rate": 3.576371588329735e-05,
"loss": 1.3798,
"step": 14300
},
{
"epoch": 10.69,
"learning_rate": 3.574987543597409e-05,
"loss": 1.3419,
"step": 14310
},
{
"epoch": 10.7,
"learning_rate": 3.5736034988650836e-05,
"loss": 1.3281,
"step": 14320
},
{
"epoch": 10.71,
"learning_rate": 3.5722194541327574e-05,
"loss": 1.3473,
"step": 14330
},
{
"epoch": 10.71,
"learning_rate": 3.570835409400432e-05,
"loss": 1.3382,
"step": 14340
},
{
"epoch": 10.72,
"learning_rate": 3.5694513646681064e-05,
"loss": 1.3411,
"step": 14350
},
{
"epoch": 10.73,
"learning_rate": 3.568067319935781e-05,
"loss": 1.3155,
"step": 14360
},
{
"epoch": 10.74,
"learning_rate": 3.5666832752034546e-05,
"loss": 1.3344,
"step": 14370
},
{
"epoch": 10.74,
"learning_rate": 3.565299230471129e-05,
"loss": 1.3273,
"step": 14380
},
{
"epoch": 10.75,
"learning_rate": 3.563915185738803e-05,
"loss": 1.3099,
"step": 14390
},
{
"epoch": 10.76,
"learning_rate": 3.562531141006478e-05,
"loss": 1.3634,
"step": 14400
},
{
"epoch": 10.77,
"learning_rate": 3.561147096274152e-05,
"loss": 1.3574,
"step": 14410
},
{
"epoch": 10.77,
"learning_rate": 3.559763051541826e-05,
"loss": 1.3101,
"step": 14420
},
{
"epoch": 10.78,
"learning_rate": 3.5583790068095e-05,
"loss": 1.3419,
"step": 14430
},
{
"epoch": 10.79,
"learning_rate": 3.5569949620771745e-05,
"loss": 1.3641,
"step": 14440
},
{
"epoch": 10.8,
"learning_rate": 3.555610917344849e-05,
"loss": 1.3437,
"step": 14450
},
{
"epoch": 10.8,
"learning_rate": 3.554226872612523e-05,
"loss": 1.316,
"step": 14460
},
{
"epoch": 10.81,
"learning_rate": 3.552842827880197e-05,
"loss": 1.3432,
"step": 14470
},
{
"epoch": 10.82,
"learning_rate": 3.551458783147871e-05,
"loss": 1.3295,
"step": 14480
},
{
"epoch": 10.83,
"learning_rate": 3.550074738415546e-05,
"loss": 1.3664,
"step": 14490
},
{
"epoch": 10.83,
"learning_rate": 3.54869069368322e-05,
"loss": 1.3623,
"step": 14500
},
{
"epoch": 10.84,
"learning_rate": 3.5473066489508944e-05,
"loss": 1.3287,
"step": 14510
},
{
"epoch": 10.85,
"learning_rate": 3.545922604218568e-05,
"loss": 1.3413,
"step": 14520
},
{
"epoch": 10.86,
"learning_rate": 3.5445385594862426e-05,
"loss": 1.3268,
"step": 14530
},
{
"epoch": 10.86,
"learning_rate": 3.543154514753917e-05,
"loss": 1.304,
"step": 14540
},
{
"epoch": 10.87,
"learning_rate": 3.5417704700215916e-05,
"loss": 1.3271,
"step": 14550
},
{
"epoch": 10.88,
"learning_rate": 3.5403864252892654e-05,
"loss": 1.3233,
"step": 14560
},
{
"epoch": 10.89,
"learning_rate": 3.53900238055694e-05,
"loss": 1.3484,
"step": 14570
},
{
"epoch": 10.89,
"learning_rate": 3.537618335824614e-05,
"loss": 1.3503,
"step": 14580
},
{
"epoch": 10.9,
"learning_rate": 3.536234291092289e-05,
"loss": 1.3579,
"step": 14590
},
{
"epoch": 10.91,
"learning_rate": 3.5348502463599625e-05,
"loss": 1.3572,
"step": 14600
},
{
"epoch": 10.92,
"learning_rate": 3.533466201627636e-05,
"loss": 1.3513,
"step": 14610
},
{
"epoch": 10.92,
"learning_rate": 3.532082156895311e-05,
"loss": 1.2904,
"step": 14620
},
{
"epoch": 10.93,
"learning_rate": 3.530698112162985e-05,
"loss": 1.3531,
"step": 14630
},
{
"epoch": 10.94,
"learning_rate": 3.52931406743066e-05,
"loss": 1.3887,
"step": 14640
},
{
"epoch": 10.95,
"learning_rate": 3.5279300226983335e-05,
"loss": 1.3493,
"step": 14650
},
{
"epoch": 10.95,
"learning_rate": 3.526545977966008e-05,
"loss": 1.3151,
"step": 14660
},
{
"epoch": 10.96,
"learning_rate": 3.5251619332336824e-05,
"loss": 1.2824,
"step": 14670
},
{
"epoch": 10.97,
"learning_rate": 3.523777888501357e-05,
"loss": 1.3727,
"step": 14680
},
{
"epoch": 10.97,
"learning_rate": 3.522393843769031e-05,
"loss": 1.3781,
"step": 14690
},
{
"epoch": 10.98,
"learning_rate": 3.521009799036705e-05,
"loss": 1.3222,
"step": 14700
},
{
"epoch": 10.99,
"learning_rate": 3.519625754304379e-05,
"loss": 1.3845,
"step": 14710
},
{
"epoch": 11.0,
"learning_rate": 3.518241709572054e-05,
"loss": 1.3303,
"step": 14720
},
{
"epoch": 11.0,
"eval_accuracy": 0.5718787751457535,
"eval_loss": 0.9952067732810974,
"eval_runtime": 228.5023,
"eval_samples_per_second": 83.321,
"eval_steps_per_second": 2.604,
"step": 14723
},
{
"epoch": 11.0,
"learning_rate": 3.516857664839728e-05,
"loss": 1.3038,
"step": 14730
},
{
"epoch": 11.01,
"learning_rate": 3.5154736201074017e-05,
"loss": 1.3117,
"step": 14740
},
{
"epoch": 11.02,
"learning_rate": 3.514089575375076e-05,
"loss": 1.2895,
"step": 14750
},
{
"epoch": 11.03,
"learning_rate": 3.5127055306427506e-05,
"loss": 1.3037,
"step": 14760
},
{
"epoch": 11.03,
"learning_rate": 3.511321485910425e-05,
"loss": 1.3403,
"step": 14770
},
{
"epoch": 11.04,
"learning_rate": 3.509937441178099e-05,
"loss": 1.3293,
"step": 14780
},
{
"epoch": 11.05,
"learning_rate": 3.508553396445773e-05,
"loss": 1.3504,
"step": 14790
},
{
"epoch": 11.06,
"learning_rate": 3.507169351713447e-05,
"loss": 1.3506,
"step": 14800
},
{
"epoch": 11.06,
"learning_rate": 3.505785306981122e-05,
"loss": 1.3673,
"step": 14810
},
{
"epoch": 11.07,
"learning_rate": 3.504401262248796e-05,
"loss": 1.3579,
"step": 14820
},
{
"epoch": 11.08,
"learning_rate": 3.5030172175164705e-05,
"loss": 1.3782,
"step": 14830
},
{
"epoch": 11.09,
"learning_rate": 3.501633172784144e-05,
"loss": 1.3376,
"step": 14840
},
{
"epoch": 11.09,
"learning_rate": 3.500249128051819e-05,
"loss": 1.3498,
"step": 14850
},
{
"epoch": 11.1,
"learning_rate": 3.498865083319493e-05,
"loss": 1.3268,
"step": 14860
},
{
"epoch": 11.11,
"learning_rate": 3.4974810385871677e-05,
"loss": 1.3072,
"step": 14870
},
{
"epoch": 11.12,
"learning_rate": 3.4960969938548414e-05,
"loss": 1.3573,
"step": 14880
},
{
"epoch": 11.12,
"learning_rate": 3.494712949122516e-05,
"loss": 1.3254,
"step": 14890
},
{
"epoch": 11.13,
"learning_rate": 3.4933289043901904e-05,
"loss": 1.325,
"step": 14900
},
{
"epoch": 11.14,
"learning_rate": 3.491944859657864e-05,
"loss": 1.3173,
"step": 14910
},
{
"epoch": 11.15,
"learning_rate": 3.4905608149255386e-05,
"loss": 1.3826,
"step": 14920
},
{
"epoch": 11.15,
"learning_rate": 3.4891767701932124e-05,
"loss": 1.2676,
"step": 14930
},
{
"epoch": 11.16,
"learning_rate": 3.487792725460887e-05,
"loss": 1.3091,
"step": 14940
},
{
"epoch": 11.17,
"learning_rate": 3.486408680728561e-05,
"loss": 1.2913,
"step": 14950
},
{
"epoch": 11.18,
"learning_rate": 3.485024635996236e-05,
"loss": 1.3273,
"step": 14960
},
{
"epoch": 11.18,
"learning_rate": 3.4836405912639096e-05,
"loss": 1.3489,
"step": 14970
},
{
"epoch": 11.19,
"learning_rate": 3.482256546531584e-05,
"loss": 1.3624,
"step": 14980
},
{
"epoch": 11.2,
"learning_rate": 3.4808725017992585e-05,
"loss": 1.3472,
"step": 14990
},
{
"epoch": 11.21,
"learning_rate": 3.479488457066933e-05,
"loss": 1.3239,
"step": 15000
},
{
"epoch": 11.21,
"learning_rate": 3.478104412334607e-05,
"loss": 1.3643,
"step": 15010
},
{
"epoch": 11.22,
"learning_rate": 3.4767203676022805e-05,
"loss": 1.336,
"step": 15020
},
{
"epoch": 11.23,
"learning_rate": 3.475336322869956e-05,
"loss": 1.3317,
"step": 15030
},
{
"epoch": 11.24,
"learning_rate": 3.4739522781376295e-05,
"loss": 1.3409,
"step": 15040
},
{
"epoch": 11.24,
"learning_rate": 3.472568233405304e-05,
"loss": 1.3141,
"step": 15050
},
{
"epoch": 11.25,
"learning_rate": 3.471184188672978e-05,
"loss": 1.3188,
"step": 15060
},
{
"epoch": 11.26,
"learning_rate": 3.469800143940652e-05,
"loss": 1.3322,
"step": 15070
},
{
"epoch": 11.27,
"learning_rate": 3.4684160992083267e-05,
"loss": 1.3358,
"step": 15080
},
{
"epoch": 11.27,
"learning_rate": 3.467032054476001e-05,
"loss": 1.3132,
"step": 15090
},
{
"epoch": 11.28,
"learning_rate": 3.465648009743675e-05,
"loss": 1.3225,
"step": 15100
},
{
"epoch": 11.29,
"learning_rate": 3.4642639650113494e-05,
"loss": 1.3074,
"step": 15110
},
{
"epoch": 11.3,
"learning_rate": 3.462879920279024e-05,
"loss": 1.3602,
"step": 15120
},
{
"epoch": 11.3,
"learning_rate": 3.461495875546698e-05,
"loss": 1.3188,
"step": 15130
},
{
"epoch": 11.31,
"learning_rate": 3.460111830814372e-05,
"loss": 1.2925,
"step": 15140
},
{
"epoch": 11.32,
"learning_rate": 3.458727786082046e-05,
"loss": 1.3425,
"step": 15150
},
{
"epoch": 11.33,
"learning_rate": 3.45734374134972e-05,
"loss": 1.3003,
"step": 15160
},
{
"epoch": 11.33,
"learning_rate": 3.455959696617395e-05,
"loss": 1.3292,
"step": 15170
},
{
"epoch": 11.34,
"learning_rate": 3.454575651885069e-05,
"loss": 1.3131,
"step": 15180
},
{
"epoch": 11.35,
"learning_rate": 3.453191607152743e-05,
"loss": 1.3773,
"step": 15190
},
{
"epoch": 11.36,
"learning_rate": 3.4518075624204175e-05,
"loss": 1.3217,
"step": 15200
},
{
"epoch": 11.36,
"learning_rate": 3.450423517688092e-05,
"loss": 1.3527,
"step": 15210
},
{
"epoch": 11.37,
"learning_rate": 3.4490394729557664e-05,
"loss": 1.3042,
"step": 15220
},
{
"epoch": 11.38,
"learning_rate": 3.44765542822344e-05,
"loss": 1.2948,
"step": 15230
},
{
"epoch": 11.39,
"learning_rate": 3.446271383491115e-05,
"loss": 1.3549,
"step": 15240
},
{
"epoch": 11.39,
"learning_rate": 3.4448873387587885e-05,
"loss": 1.3193,
"step": 15250
},
{
"epoch": 11.4,
"learning_rate": 3.4435032940264636e-05,
"loss": 1.3265,
"step": 15260
},
{
"epoch": 11.41,
"learning_rate": 3.4421192492941374e-05,
"loss": 1.3517,
"step": 15270
},
{
"epoch": 11.42,
"learning_rate": 3.440735204561812e-05,
"loss": 1.335,
"step": 15280
},
{
"epoch": 11.42,
"learning_rate": 3.439351159829486e-05,
"loss": 1.3515,
"step": 15290
},
{
"epoch": 11.43,
"learning_rate": 3.43796711509716e-05,
"loss": 1.3621,
"step": 15300
},
{
"epoch": 11.44,
"learning_rate": 3.4365830703648346e-05,
"loss": 1.3779,
"step": 15310
},
{
"epoch": 11.45,
"learning_rate": 3.4351990256325084e-05,
"loss": 1.3131,
"step": 15320
},
{
"epoch": 11.45,
"learning_rate": 3.433814980900183e-05,
"loss": 1.3209,
"step": 15330
},
{
"epoch": 11.46,
"learning_rate": 3.4324309361678566e-05,
"loss": 1.3214,
"step": 15340
},
{
"epoch": 11.47,
"learning_rate": 3.431046891435532e-05,
"loss": 1.3499,
"step": 15350
},
{
"epoch": 11.48,
"learning_rate": 3.4296628467032056e-05,
"loss": 1.3319,
"step": 15360
},
{
"epoch": 11.48,
"learning_rate": 3.42827880197088e-05,
"loss": 1.3792,
"step": 15370
},
{
"epoch": 11.49,
"learning_rate": 3.426894757238554e-05,
"loss": 1.3609,
"step": 15380
},
{
"epoch": 11.5,
"learning_rate": 3.425510712506228e-05,
"loss": 1.3116,
"step": 15390
},
{
"epoch": 11.51,
"learning_rate": 3.424126667773903e-05,
"loss": 1.3356,
"step": 15400
},
{
"epoch": 11.51,
"learning_rate": 3.422742623041577e-05,
"loss": 1.3484,
"step": 15410
},
{
"epoch": 11.52,
"learning_rate": 3.421358578309251e-05,
"loss": 1.3181,
"step": 15420
},
{
"epoch": 11.53,
"learning_rate": 3.419974533576925e-05,
"loss": 1.2937,
"step": 15430
},
{
"epoch": 11.54,
"learning_rate": 3.4185904888446e-05,
"loss": 1.3206,
"step": 15440
},
{
"epoch": 11.54,
"learning_rate": 3.417206444112274e-05,
"loss": 1.3127,
"step": 15450
},
{
"epoch": 11.55,
"learning_rate": 3.415822399379948e-05,
"loss": 1.3331,
"step": 15460
},
{
"epoch": 11.56,
"learning_rate": 3.414438354647622e-05,
"loss": 1.329,
"step": 15470
},
{
"epoch": 11.57,
"learning_rate": 3.4130543099152964e-05,
"loss": 1.3609,
"step": 15480
},
{
"epoch": 11.57,
"learning_rate": 3.411670265182971e-05,
"loss": 1.3157,
"step": 15490
},
{
"epoch": 11.58,
"learning_rate": 3.4102862204506453e-05,
"loss": 1.3118,
"step": 15500
},
{
"epoch": 11.59,
"learning_rate": 3.408902175718319e-05,
"loss": 1.3433,
"step": 15510
},
{
"epoch": 11.6,
"learning_rate": 3.4075181309859936e-05,
"loss": 1.3441,
"step": 15520
},
{
"epoch": 11.6,
"learning_rate": 3.406134086253668e-05,
"loss": 1.312,
"step": 15530
},
{
"epoch": 11.61,
"learning_rate": 3.4047500415213425e-05,
"loss": 1.3227,
"step": 15540
},
{
"epoch": 11.62,
"learning_rate": 3.403365996789016e-05,
"loss": 1.3329,
"step": 15550
},
{
"epoch": 11.62,
"learning_rate": 3.401981952056691e-05,
"loss": 1.3345,
"step": 15560
},
{
"epoch": 11.63,
"learning_rate": 3.4005979073243646e-05,
"loss": 1.3759,
"step": 15570
},
{
"epoch": 11.64,
"learning_rate": 3.399213862592039e-05,
"loss": 1.35,
"step": 15580
},
{
"epoch": 11.65,
"learning_rate": 3.3978298178597135e-05,
"loss": 1.3601,
"step": 15590
},
{
"epoch": 11.65,
"learning_rate": 3.396445773127387e-05,
"loss": 1.3344,
"step": 15600
},
{
"epoch": 11.66,
"learning_rate": 3.395061728395062e-05,
"loss": 1.3371,
"step": 15610
},
{
"epoch": 11.67,
"learning_rate": 3.393677683662736e-05,
"loss": 1.326,
"step": 15620
},
{
"epoch": 11.68,
"learning_rate": 3.392293638930411e-05,
"loss": 1.3121,
"step": 15630
},
{
"epoch": 11.68,
"learning_rate": 3.3909095941980845e-05,
"loss": 1.3577,
"step": 15640
},
{
"epoch": 11.69,
"learning_rate": 3.389525549465759e-05,
"loss": 1.3075,
"step": 15650
},
{
"epoch": 11.7,
"learning_rate": 3.3881415047334334e-05,
"loss": 1.3482,
"step": 15660
},
{
"epoch": 11.71,
"learning_rate": 3.386757460001108e-05,
"loss": 1.3449,
"step": 15670
},
{
"epoch": 11.71,
"learning_rate": 3.3853734152687816e-05,
"loss": 1.3399,
"step": 15680
},
{
"epoch": 11.72,
"learning_rate": 3.383989370536456e-05,
"loss": 1.3084,
"step": 15690
},
{
"epoch": 11.73,
"learning_rate": 3.38260532580413e-05,
"loss": 1.3223,
"step": 15700
},
{
"epoch": 11.74,
"learning_rate": 3.3812212810718043e-05,
"loss": 1.3103,
"step": 15710
},
{
"epoch": 11.74,
"learning_rate": 3.379837236339479e-05,
"loss": 1.3224,
"step": 15720
},
{
"epoch": 11.75,
"learning_rate": 3.3784531916071526e-05,
"loss": 1.302,
"step": 15730
},
{
"epoch": 11.76,
"learning_rate": 3.377069146874827e-05,
"loss": 1.3162,
"step": 15740
},
{
"epoch": 11.77,
"learning_rate": 3.3756851021425015e-05,
"loss": 1.2973,
"step": 15750
},
{
"epoch": 11.77,
"learning_rate": 3.374301057410176e-05,
"loss": 1.3187,
"step": 15760
},
{
"epoch": 11.78,
"learning_rate": 3.37291701267785e-05,
"loss": 1.3208,
"step": 15770
},
{
"epoch": 11.79,
"learning_rate": 3.371532967945524e-05,
"loss": 1.2874,
"step": 15780
},
{
"epoch": 11.8,
"learning_rate": 3.370148923213198e-05,
"loss": 1.3751,
"step": 15790
},
{
"epoch": 11.8,
"learning_rate": 3.368764878480873e-05,
"loss": 1.3221,
"step": 15800
},
{
"epoch": 11.81,
"learning_rate": 3.367380833748547e-05,
"loss": 1.3237,
"step": 15810
},
{
"epoch": 11.82,
"learning_rate": 3.3659967890162214e-05,
"loss": 1.3312,
"step": 15820
},
{
"epoch": 11.83,
"learning_rate": 3.364612744283895e-05,
"loss": 1.3351,
"step": 15830
},
{
"epoch": 11.83,
"learning_rate": 3.36322869955157e-05,
"loss": 1.3068,
"step": 15840
},
{
"epoch": 11.84,
"learning_rate": 3.361844654819244e-05,
"loss": 1.3722,
"step": 15850
},
{
"epoch": 11.85,
"learning_rate": 3.360460610086918e-05,
"loss": 1.3478,
"step": 15860
},
{
"epoch": 11.86,
"learning_rate": 3.3590765653545924e-05,
"loss": 1.3331,
"step": 15870
},
{
"epoch": 11.86,
"learning_rate": 3.357692520622266e-05,
"loss": 1.3233,
"step": 15880
},
{
"epoch": 11.87,
"learning_rate": 3.356308475889941e-05,
"loss": 1.3624,
"step": 15890
},
{
"epoch": 11.88,
"learning_rate": 3.354924431157615e-05,
"loss": 1.3364,
"step": 15900
},
{
"epoch": 11.89,
"learning_rate": 3.3535403864252896e-05,
"loss": 1.3444,
"step": 15910
},
{
"epoch": 11.89,
"learning_rate": 3.3521563416929634e-05,
"loss": 1.3378,
"step": 15920
},
{
"epoch": 11.9,
"learning_rate": 3.350772296960638e-05,
"loss": 1.3151,
"step": 15930
},
{
"epoch": 11.91,
"learning_rate": 3.349388252228312e-05,
"loss": 1.3703,
"step": 15940
},
{
"epoch": 11.92,
"learning_rate": 3.348004207495987e-05,
"loss": 1.3582,
"step": 15950
},
{
"epoch": 11.92,
"learning_rate": 3.3466201627636605e-05,
"loss": 1.3139,
"step": 15960
},
{
"epoch": 11.93,
"learning_rate": 3.345236118031335e-05,
"loss": 1.3319,
"step": 15970
},
{
"epoch": 11.94,
"learning_rate": 3.3438520732990095e-05,
"loss": 1.308,
"step": 15980
},
{
"epoch": 11.95,
"learning_rate": 3.342468028566683e-05,
"loss": 1.3096,
"step": 15990
},
{
"epoch": 11.95,
"learning_rate": 3.341083983834358e-05,
"loss": 1.313,
"step": 16000
},
{
"epoch": 11.96,
"learning_rate": 3.3396999391020315e-05,
"loss": 1.3371,
"step": 16010
},
{
"epoch": 11.97,
"learning_rate": 3.338315894369706e-05,
"loss": 1.3293,
"step": 16020
},
{
"epoch": 11.98,
"learning_rate": 3.3369318496373804e-05,
"loss": 1.3428,
"step": 16030
},
{
"epoch": 11.98,
"learning_rate": 3.335547804905055e-05,
"loss": 1.3229,
"step": 16040
},
{
"epoch": 11.99,
"learning_rate": 3.334163760172729e-05,
"loss": 1.3458,
"step": 16050
},
{
"epoch": 12.0,
"learning_rate": 3.332779715440403e-05,
"loss": 1.3575,
"step": 16060
},
{
"epoch": 12.0,
"eval_accuracy": 0.5574347392194968,
"eval_loss": 1.0317397117614746,
"eval_runtime": 249.9152,
"eval_samples_per_second": 76.182,
"eval_steps_per_second": 2.381,
"step": 16062
},
{
"epoch": 12.01,
"learning_rate": 3.3313956707080776e-05,
"loss": 1.3207,
"step": 16070
},
{
"epoch": 12.01,
"learning_rate": 3.330011625975752e-05,
"loss": 1.3374,
"step": 16080
},
{
"epoch": 12.02,
"learning_rate": 3.328627581243426e-05,
"loss": 1.3497,
"step": 16090
},
{
"epoch": 12.03,
"learning_rate": 3.3272435365111e-05,
"loss": 1.3376,
"step": 16100
},
{
"epoch": 12.04,
"learning_rate": 3.325859491778774e-05,
"loss": 1.3038,
"step": 16110
},
{
"epoch": 12.04,
"learning_rate": 3.324475447046449e-05,
"loss": 1.3117,
"step": 16120
},
{
"epoch": 12.05,
"learning_rate": 3.323091402314123e-05,
"loss": 1.3048,
"step": 16130
},
{
"epoch": 12.06,
"learning_rate": 3.321707357581797e-05,
"loss": 1.3266,
"step": 16140
},
{
"epoch": 12.07,
"learning_rate": 3.320323312849471e-05,
"loss": 1.314,
"step": 16150
},
{
"epoch": 12.07,
"learning_rate": 3.318939268117146e-05,
"loss": 1.3538,
"step": 16160
},
{
"epoch": 12.08,
"learning_rate": 3.31755522338482e-05,
"loss": 1.3559,
"step": 16170
},
{
"epoch": 12.09,
"learning_rate": 3.316171178652494e-05,
"loss": 1.3282,
"step": 16180
},
{
"epoch": 12.1,
"learning_rate": 3.3147871339201685e-05,
"loss": 1.3517,
"step": 16190
},
{
"epoch": 12.1,
"learning_rate": 3.313403089187842e-05,
"loss": 1.3388,
"step": 16200
},
{
"epoch": 12.11,
"learning_rate": 3.3120190444555174e-05,
"loss": 1.3367,
"step": 16210
},
{
"epoch": 12.12,
"learning_rate": 3.310634999723191e-05,
"loss": 1.3129,
"step": 16220
},
{
"epoch": 12.13,
"learning_rate": 3.3092509549908656e-05,
"loss": 1.3308,
"step": 16230
},
{
"epoch": 12.13,
"learning_rate": 3.3078669102585394e-05,
"loss": 1.2946,
"step": 16240
},
{
"epoch": 12.14,
"learning_rate": 3.306482865526214e-05,
"loss": 1.3199,
"step": 16250
},
{
"epoch": 12.15,
"learning_rate": 3.3050988207938884e-05,
"loss": 1.3099,
"step": 16260
},
{
"epoch": 12.16,
"learning_rate": 3.303714776061562e-05,
"loss": 1.335,
"step": 16270
},
{
"epoch": 12.16,
"learning_rate": 3.3023307313292366e-05,
"loss": 1.3219,
"step": 16280
},
{
"epoch": 12.17,
"learning_rate": 3.300946686596911e-05,
"loss": 1.3244,
"step": 16290
},
{
"epoch": 12.18,
"learning_rate": 3.2995626418645855e-05,
"loss": 1.3523,
"step": 16300
},
{
"epoch": 12.19,
"learning_rate": 3.298178597132259e-05,
"loss": 1.2925,
"step": 16310
},
{
"epoch": 12.19,
"learning_rate": 3.296794552399934e-05,
"loss": 1.3161,
"step": 16320
},
{
"epoch": 12.2,
"learning_rate": 3.2954105076676076e-05,
"loss": 1.3057,
"step": 16330
},
{
"epoch": 12.21,
"learning_rate": 3.294026462935283e-05,
"loss": 1.3592,
"step": 16340
},
{
"epoch": 12.22,
"learning_rate": 3.2926424182029565e-05,
"loss": 1.3392,
"step": 16350
},
{
"epoch": 12.22,
"learning_rate": 3.291258373470631e-05,
"loss": 1.3474,
"step": 16360
},
{
"epoch": 12.23,
"learning_rate": 3.289874328738305e-05,
"loss": 1.2814,
"step": 16370
},
{
"epoch": 12.24,
"learning_rate": 3.288490284005979e-05,
"loss": 1.3565,
"step": 16380
},
{
"epoch": 12.25,
"learning_rate": 3.287106239273654e-05,
"loss": 1.363,
"step": 16390
},
{
"epoch": 12.25,
"learning_rate": 3.2857221945413275e-05,
"loss": 1.3102,
"step": 16400
},
{
"epoch": 12.26,
"learning_rate": 3.284338149809002e-05,
"loss": 1.3008,
"step": 16410
},
{
"epoch": 12.27,
"learning_rate": 3.282954105076676e-05,
"loss": 1.3448,
"step": 16420
},
{
"epoch": 12.27,
"learning_rate": 3.281570060344351e-05,
"loss": 1.334,
"step": 16430
},
{
"epoch": 12.28,
"learning_rate": 3.2801860156120247e-05,
"loss": 1.3307,
"step": 16440
},
{
"epoch": 12.29,
"learning_rate": 3.278801970879699e-05,
"loss": 1.3571,
"step": 16450
},
{
"epoch": 12.3,
"learning_rate": 3.277417926147373e-05,
"loss": 1.3547,
"step": 16460
},
{
"epoch": 12.3,
"learning_rate": 3.2760338814150474e-05,
"loss": 1.3743,
"step": 16470
},
{
"epoch": 12.31,
"learning_rate": 3.274649836682722e-05,
"loss": 1.3272,
"step": 16480
},
{
"epoch": 12.32,
"learning_rate": 3.273265791950396e-05,
"loss": 1.2864,
"step": 16490
},
{
"epoch": 12.33,
"learning_rate": 3.27188174721807e-05,
"loss": 1.3099,
"step": 16500
},
{
"epoch": 12.33,
"learning_rate": 3.2704977024857445e-05,
"loss": 1.3474,
"step": 16510
},
{
"epoch": 12.34,
"learning_rate": 3.269113657753419e-05,
"loss": 1.3225,
"step": 16520
},
{
"epoch": 12.35,
"learning_rate": 3.2677296130210935e-05,
"loss": 1.3259,
"step": 16530
},
{
"epoch": 12.36,
"learning_rate": 3.266345568288767e-05,
"loss": 1.2979,
"step": 16540
},
{
"epoch": 12.36,
"learning_rate": 3.264961523556441e-05,
"loss": 1.3519,
"step": 16550
},
{
"epoch": 12.37,
"learning_rate": 3.2635774788241155e-05,
"loss": 1.3486,
"step": 16560
},
{
"epoch": 12.38,
"learning_rate": 3.26219343409179e-05,
"loss": 1.3259,
"step": 16570
},
{
"epoch": 12.39,
"learning_rate": 3.2608093893594644e-05,
"loss": 1.3349,
"step": 16580
},
{
"epoch": 12.39,
"learning_rate": 3.259425344627138e-05,
"loss": 1.3376,
"step": 16590
},
{
"epoch": 12.4,
"learning_rate": 3.258041299894813e-05,
"loss": 1.3514,
"step": 16600
},
{
"epoch": 12.41,
"learning_rate": 3.256657255162487e-05,
"loss": 1.332,
"step": 16610
},
{
"epoch": 12.42,
"learning_rate": 3.2552732104301616e-05,
"loss": 1.3178,
"step": 16620
},
{
"epoch": 12.42,
"learning_rate": 3.2538891656978354e-05,
"loss": 1.3376,
"step": 16630
},
{
"epoch": 12.43,
"learning_rate": 3.25250512096551e-05,
"loss": 1.2878,
"step": 16640
},
{
"epoch": 12.44,
"learning_rate": 3.2511210762331837e-05,
"loss": 1.3643,
"step": 16650
},
{
"epoch": 12.45,
"learning_rate": 3.249737031500859e-05,
"loss": 1.2923,
"step": 16660
},
{
"epoch": 12.45,
"learning_rate": 3.2483529867685326e-05,
"loss": 1.3066,
"step": 16670
},
{
"epoch": 12.46,
"learning_rate": 3.2469689420362064e-05,
"loss": 1.3387,
"step": 16680
},
{
"epoch": 12.47,
"learning_rate": 3.245584897303881e-05,
"loss": 1.3355,
"step": 16690
},
{
"epoch": 12.48,
"learning_rate": 3.244200852571555e-05,
"loss": 1.3036,
"step": 16700
},
{
"epoch": 12.48,
"learning_rate": 3.24281680783923e-05,
"loss": 1.3415,
"step": 16710
},
{
"epoch": 12.49,
"learning_rate": 3.2414327631069036e-05,
"loss": 1.3459,
"step": 16720
},
{
"epoch": 12.5,
"learning_rate": 3.240048718374578e-05,
"loss": 1.3434,
"step": 16730
},
{
"epoch": 12.51,
"learning_rate": 3.238664673642252e-05,
"loss": 1.3344,
"step": 16740
},
{
"epoch": 12.51,
"learning_rate": 3.237280628909927e-05,
"loss": 1.3158,
"step": 16750
},
{
"epoch": 12.52,
"learning_rate": 3.235896584177601e-05,
"loss": 1.3226,
"step": 16760
},
{
"epoch": 12.53,
"learning_rate": 3.234512539445275e-05,
"loss": 1.2927,
"step": 16770
},
{
"epoch": 12.54,
"learning_rate": 3.233128494712949e-05,
"loss": 1.3149,
"step": 16780
},
{
"epoch": 12.54,
"learning_rate": 3.2317444499806234e-05,
"loss": 1.3291,
"step": 16790
},
{
"epoch": 12.55,
"learning_rate": 3.230360405248298e-05,
"loss": 1.3523,
"step": 16800
},
{
"epoch": 12.56,
"learning_rate": 3.2289763605159724e-05,
"loss": 1.3153,
"step": 16810
},
{
"epoch": 12.57,
"learning_rate": 3.227592315783646e-05,
"loss": 1.2988,
"step": 16820
},
{
"epoch": 12.57,
"learning_rate": 3.2262082710513206e-05,
"loss": 1.3228,
"step": 16830
},
{
"epoch": 12.58,
"learning_rate": 3.224824226318995e-05,
"loss": 1.3018,
"step": 16840
},
{
"epoch": 12.59,
"learning_rate": 3.223440181586669e-05,
"loss": 1.3261,
"step": 16850
},
{
"epoch": 12.6,
"learning_rate": 3.2220561368543433e-05,
"loss": 1.2917,
"step": 16860
},
{
"epoch": 12.6,
"learning_rate": 3.220672092122017e-05,
"loss": 1.3257,
"step": 16870
},
{
"epoch": 12.61,
"learning_rate": 3.2192880473896916e-05,
"loss": 1.3367,
"step": 16880
},
{
"epoch": 12.62,
"learning_rate": 3.217904002657366e-05,
"loss": 1.3322,
"step": 16890
},
{
"epoch": 12.63,
"learning_rate": 3.2165199579250405e-05,
"loss": 1.3388,
"step": 16900
},
{
"epoch": 12.63,
"learning_rate": 3.215135913192714e-05,
"loss": 1.3034,
"step": 16910
},
{
"epoch": 12.64,
"learning_rate": 3.213751868460389e-05,
"loss": 1.3238,
"step": 16920
},
{
"epoch": 12.65,
"learning_rate": 3.212367823728063e-05,
"loss": 1.3152,
"step": 16930
},
{
"epoch": 12.66,
"learning_rate": 3.210983778995738e-05,
"loss": 1.3216,
"step": 16940
},
{
"epoch": 12.66,
"learning_rate": 3.2095997342634115e-05,
"loss": 1.3359,
"step": 16950
},
{
"epoch": 12.67,
"learning_rate": 3.208215689531085e-05,
"loss": 1.3665,
"step": 16960
},
{
"epoch": 12.68,
"learning_rate": 3.2068316447987604e-05,
"loss": 1.3041,
"step": 16970
},
{
"epoch": 12.69,
"learning_rate": 3.205447600066434e-05,
"loss": 1.3218,
"step": 16980
},
{
"epoch": 12.69,
"learning_rate": 3.204063555334109e-05,
"loss": 1.3106,
"step": 16990
},
{
"epoch": 12.7,
"learning_rate": 3.2026795106017825e-05,
"loss": 1.3523,
"step": 17000
},
{
"epoch": 12.71,
"learning_rate": 3.201295465869457e-05,
"loss": 1.3349,
"step": 17010
},
{
"epoch": 12.72,
"learning_rate": 3.1999114211371314e-05,
"loss": 1.3055,
"step": 17020
},
{
"epoch": 12.72,
"learning_rate": 3.198527376404806e-05,
"loss": 1.2833,
"step": 17030
},
{
"epoch": 12.73,
"learning_rate": 3.1971433316724796e-05,
"loss": 1.3393,
"step": 17040
},
{
"epoch": 12.74,
"learning_rate": 3.195759286940154e-05,
"loss": 1.3133,
"step": 17050
},
{
"epoch": 12.75,
"learning_rate": 3.1943752422078286e-05,
"loss": 1.2762,
"step": 17060
},
{
"epoch": 12.75,
"learning_rate": 3.192991197475503e-05,
"loss": 1.3425,
"step": 17070
},
{
"epoch": 12.76,
"learning_rate": 3.191607152743177e-05,
"loss": 1.3099,
"step": 17080
},
{
"epoch": 12.77,
"learning_rate": 3.190223108010851e-05,
"loss": 1.3254,
"step": 17090
},
{
"epoch": 12.78,
"learning_rate": 3.188839063278525e-05,
"loss": 1.3071,
"step": 17100
},
{
"epoch": 12.78,
"learning_rate": 3.1874550185461995e-05,
"loss": 1.3363,
"step": 17110
},
{
"epoch": 12.79,
"learning_rate": 3.186070973813874e-05,
"loss": 1.2928,
"step": 17120
},
{
"epoch": 12.8,
"learning_rate": 3.184686929081548e-05,
"loss": 1.3572,
"step": 17130
},
{
"epoch": 12.81,
"learning_rate": 3.183302884349222e-05,
"loss": 1.2769,
"step": 17140
},
{
"epoch": 12.81,
"learning_rate": 3.181918839616897e-05,
"loss": 1.3623,
"step": 17150
},
{
"epoch": 12.82,
"learning_rate": 3.180534794884571e-05,
"loss": 1.3186,
"step": 17160
},
{
"epoch": 12.83,
"learning_rate": 3.179150750152245e-05,
"loss": 1.3341,
"step": 17170
},
{
"epoch": 12.84,
"learning_rate": 3.1777667054199194e-05,
"loss": 1.31,
"step": 17180
},
{
"epoch": 12.84,
"learning_rate": 3.176382660687593e-05,
"loss": 1.3519,
"step": 17190
},
{
"epoch": 12.85,
"learning_rate": 3.1749986159552683e-05,
"loss": 1.3383,
"step": 17200
},
{
"epoch": 12.86,
"learning_rate": 3.173614571222942e-05,
"loss": 1.3192,
"step": 17210
},
{
"epoch": 12.87,
"learning_rate": 3.1722305264906166e-05,
"loss": 1.3314,
"step": 17220
},
{
"epoch": 12.87,
"learning_rate": 3.1708464817582904e-05,
"loss": 1.3621,
"step": 17230
},
{
"epoch": 12.88,
"learning_rate": 3.169462437025965e-05,
"loss": 1.2664,
"step": 17240
},
{
"epoch": 12.89,
"learning_rate": 3.168078392293639e-05,
"loss": 1.3306,
"step": 17250
},
{
"epoch": 12.9,
"learning_rate": 3.166694347561313e-05,
"loss": 1.3228,
"step": 17260
},
{
"epoch": 12.9,
"learning_rate": 3.1653103028289876e-05,
"loss": 1.3284,
"step": 17270
},
{
"epoch": 12.91,
"learning_rate": 3.1639262580966613e-05,
"loss": 1.3477,
"step": 17280
},
{
"epoch": 12.92,
"learning_rate": 3.1625422133643365e-05,
"loss": 1.3374,
"step": 17290
},
{
"epoch": 12.92,
"learning_rate": 3.16115816863201e-05,
"loss": 1.3436,
"step": 17300
},
{
"epoch": 12.93,
"learning_rate": 3.159774123899685e-05,
"loss": 1.3442,
"step": 17310
},
{
"epoch": 12.94,
"learning_rate": 3.1583900791673585e-05,
"loss": 1.3433,
"step": 17320
},
{
"epoch": 12.95,
"learning_rate": 3.157006034435033e-05,
"loss": 1.339,
"step": 17330
},
{
"epoch": 12.95,
"learning_rate": 3.1556219897027075e-05,
"loss": 1.3278,
"step": 17340
},
{
"epoch": 12.96,
"learning_rate": 3.154237944970382e-05,
"loss": 1.3202,
"step": 17350
},
{
"epoch": 12.97,
"learning_rate": 3.152853900238056e-05,
"loss": 1.3397,
"step": 17360
},
{
"epoch": 12.98,
"learning_rate": 3.15146985550573e-05,
"loss": 1.3654,
"step": 17370
},
{
"epoch": 12.98,
"learning_rate": 3.1500858107734046e-05,
"loss": 1.2884,
"step": 17380
},
{
"epoch": 12.99,
"learning_rate": 3.1487017660410784e-05,
"loss": 1.2874,
"step": 17390
},
{
"epoch": 13.0,
"learning_rate": 3.147317721308753e-05,
"loss": 1.3129,
"step": 17400
},
{
"epoch": 13.0,
"eval_accuracy": 0.581333053206576,
"eval_loss": 0.9851331114768982,
"eval_runtime": 214.3011,
"eval_samples_per_second": 88.842,
"eval_steps_per_second": 2.776,
"step": 17400
},
{
"epoch": 13.01,
"learning_rate": 3.145933676576427e-05,
"loss": 1.3295,
"step": 17410
},
{
"epoch": 13.01,
"learning_rate": 3.144549631844101e-05,
"loss": 1.3507,
"step": 17420
},
{
"epoch": 13.02,
"learning_rate": 3.1431655871117756e-05,
"loss": 1.3239,
"step": 17430
},
{
"epoch": 13.03,
"learning_rate": 3.14178154237945e-05,
"loss": 1.2824,
"step": 17440
},
{
"epoch": 13.04,
"learning_rate": 3.140397497647124e-05,
"loss": 1.3063,
"step": 17450
},
{
"epoch": 13.04,
"learning_rate": 3.139013452914798e-05,
"loss": 1.3081,
"step": 17460
},
{
"epoch": 13.05,
"learning_rate": 3.137629408182473e-05,
"loss": 1.3242,
"step": 17470
},
{
"epoch": 13.06,
"learning_rate": 3.136245363450147e-05,
"loss": 1.3078,
"step": 17480
},
{
"epoch": 13.07,
"learning_rate": 3.134861318717821e-05,
"loss": 1.3373,
"step": 17490
},
{
"epoch": 13.07,
"learning_rate": 3.1334772739854955e-05,
"loss": 1.2969,
"step": 17500
},
{
"epoch": 13.08,
"learning_rate": 3.132093229253169e-05,
"loss": 1.3524,
"step": 17510
},
{
"epoch": 13.09,
"learning_rate": 3.130709184520844e-05,
"loss": 1.276,
"step": 17520
},
{
"epoch": 13.1,
"learning_rate": 3.129325139788518e-05,
"loss": 1.3039,
"step": 17530
},
{
"epoch": 13.1,
"learning_rate": 3.127941095056192e-05,
"loss": 1.3099,
"step": 17540
},
{
"epoch": 13.11,
"learning_rate": 3.1265570503238665e-05,
"loss": 1.2954,
"step": 17550
},
{
"epoch": 13.12,
"learning_rate": 3.125173005591541e-05,
"loss": 1.3378,
"step": 17560
},
{
"epoch": 13.13,
"learning_rate": 3.1237889608592154e-05,
"loss": 1.3066,
"step": 17570
},
{
"epoch": 13.13,
"learning_rate": 3.122404916126889e-05,
"loss": 1.3123,
"step": 17580
},
{
"epoch": 13.14,
"learning_rate": 3.1210208713945636e-05,
"loss": 1.3436,
"step": 17590
},
{
"epoch": 13.15,
"learning_rate": 3.119636826662238e-05,
"loss": 1.2842,
"step": 17600
},
{
"epoch": 13.16,
"learning_rate": 3.1182527819299126e-05,
"loss": 1.331,
"step": 17610
},
{
"epoch": 13.16,
"learning_rate": 3.1168687371975864e-05,
"loss": 1.308,
"step": 17620
},
{
"epoch": 13.17,
"learning_rate": 3.115484692465261e-05,
"loss": 1.2773,
"step": 17630
},
{
"epoch": 13.18,
"learning_rate": 3.1141006477329346e-05,
"loss": 1.305,
"step": 17640
},
{
"epoch": 13.19,
"learning_rate": 3.112716603000609e-05,
"loss": 1.3134,
"step": 17650
},
{
"epoch": 13.19,
"learning_rate": 3.1113325582682835e-05,
"loss": 1.324,
"step": 17660
},
{
"epoch": 13.2,
"learning_rate": 3.109948513535957e-05,
"loss": 1.3107,
"step": 17670
},
{
"epoch": 13.21,
"learning_rate": 3.108564468803632e-05,
"loss": 1.3106,
"step": 17680
},
{
"epoch": 13.22,
"learning_rate": 3.107180424071306e-05,
"loss": 1.3491,
"step": 17690
},
{
"epoch": 13.22,
"learning_rate": 3.105796379338981e-05,
"loss": 1.3324,
"step": 17700
},
{
"epoch": 13.23,
"learning_rate": 3.1044123346066545e-05,
"loss": 1.3507,
"step": 17710
},
{
"epoch": 13.24,
"learning_rate": 3.103028289874329e-05,
"loss": 1.2962,
"step": 17720
},
{
"epoch": 13.25,
"learning_rate": 3.101644245142003e-05,
"loss": 1.3022,
"step": 17730
},
{
"epoch": 13.25,
"learning_rate": 3.100260200409678e-05,
"loss": 1.2955,
"step": 17740
},
{
"epoch": 13.26,
"learning_rate": 3.098876155677352e-05,
"loss": 1.3078,
"step": 17750
},
{
"epoch": 13.27,
"learning_rate": 3.097492110945026e-05,
"loss": 1.312,
"step": 17760
},
{
"epoch": 13.28,
"learning_rate": 3.0961080662127e-05,
"loss": 1.3142,
"step": 17770
},
{
"epoch": 13.28,
"learning_rate": 3.0947240214803744e-05,
"loss": 1.3305,
"step": 17780
},
{
"epoch": 13.29,
"learning_rate": 3.093339976748049e-05,
"loss": 1.3468,
"step": 17790
},
{
"epoch": 13.3,
"learning_rate": 3.0919559320157226e-05,
"loss": 1.3223,
"step": 17800
},
{
"epoch": 13.31,
"learning_rate": 3.090571887283397e-05,
"loss": 1.3462,
"step": 17810
},
{
"epoch": 13.31,
"learning_rate": 3.089187842551071e-05,
"loss": 1.3049,
"step": 17820
},
{
"epoch": 13.32,
"learning_rate": 3.087803797818746e-05,
"loss": 1.3556,
"step": 17830
},
{
"epoch": 13.33,
"learning_rate": 3.08641975308642e-05,
"loss": 1.299,
"step": 17840
},
{
"epoch": 13.34,
"learning_rate": 3.085035708354094e-05,
"loss": 1.3259,
"step": 17850
},
{
"epoch": 13.34,
"learning_rate": 3.083651663621768e-05,
"loss": 1.2768,
"step": 17860
},
{
"epoch": 13.35,
"learning_rate": 3.0822676188894425e-05,
"loss": 1.3298,
"step": 17870
},
{
"epoch": 13.36,
"learning_rate": 3.080883574157117e-05,
"loss": 1.3236,
"step": 17880
},
{
"epoch": 13.37,
"learning_rate": 3.0794995294247915e-05,
"loss": 1.3472,
"step": 17890
},
{
"epoch": 13.37,
"learning_rate": 3.078115484692465e-05,
"loss": 1.3162,
"step": 17900
},
{
"epoch": 13.38,
"learning_rate": 3.07673143996014e-05,
"loss": 1.325,
"step": 17910
},
{
"epoch": 13.39,
"learning_rate": 3.075347395227814e-05,
"loss": 1.2995,
"step": 17920
},
{
"epoch": 13.4,
"learning_rate": 3.073963350495488e-05,
"loss": 1.3174,
"step": 17930
},
{
"epoch": 13.4,
"learning_rate": 3.0725793057631624e-05,
"loss": 1.3216,
"step": 17940
},
{
"epoch": 13.41,
"learning_rate": 3.071195261030836e-05,
"loss": 1.2932,
"step": 17950
},
{
"epoch": 13.42,
"learning_rate": 3.069811216298511e-05,
"loss": 1.2962,
"step": 17960
},
{
"epoch": 13.43,
"learning_rate": 3.068427171566185e-05,
"loss": 1.3055,
"step": 17970
},
{
"epoch": 13.43,
"learning_rate": 3.0670431268338596e-05,
"loss": 1.308,
"step": 17980
},
{
"epoch": 13.44,
"learning_rate": 3.0656590821015334e-05,
"loss": 1.3151,
"step": 17990
},
{
"epoch": 13.45,
"learning_rate": 3.064275037369208e-05,
"loss": 1.3059,
"step": 18000
},
{
"epoch": 13.46,
"learning_rate": 3.062890992636882e-05,
"loss": 1.3255,
"step": 18010
},
{
"epoch": 13.46,
"learning_rate": 3.061506947904557e-05,
"loss": 1.3258,
"step": 18020
},
{
"epoch": 13.47,
"learning_rate": 3.0601229031722306e-05,
"loss": 1.3375,
"step": 18030
},
{
"epoch": 13.48,
"learning_rate": 3.058738858439905e-05,
"loss": 1.2906,
"step": 18040
},
{
"epoch": 13.49,
"learning_rate": 3.057354813707579e-05,
"loss": 1.2994,
"step": 18050
},
{
"epoch": 13.49,
"learning_rate": 3.055970768975254e-05,
"loss": 1.314,
"step": 18060
},
{
"epoch": 13.5,
"learning_rate": 3.054586724242928e-05,
"loss": 1.3469,
"step": 18070
},
{
"epoch": 13.51,
"learning_rate": 3.0532026795106015e-05,
"loss": 1.317,
"step": 18080
},
{
"epoch": 13.52,
"learning_rate": 3.051818634778276e-05,
"loss": 1.2849,
"step": 18090
},
{
"epoch": 13.52,
"learning_rate": 3.05043459004595e-05,
"loss": 1.2914,
"step": 18100
},
{
"epoch": 13.53,
"learning_rate": 3.049050545313625e-05,
"loss": 1.3478,
"step": 18110
},
{
"epoch": 13.54,
"learning_rate": 3.0476665005812987e-05,
"loss": 1.3256,
"step": 18120
},
{
"epoch": 13.55,
"learning_rate": 3.0462824558489732e-05,
"loss": 1.3004,
"step": 18130
},
{
"epoch": 13.55,
"learning_rate": 3.0448984111166473e-05,
"loss": 1.299,
"step": 18140
},
{
"epoch": 13.56,
"learning_rate": 3.0435143663843218e-05,
"loss": 1.33,
"step": 18150
},
{
"epoch": 13.57,
"learning_rate": 3.042130321651996e-05,
"loss": 1.3695,
"step": 18160
},
{
"epoch": 13.57,
"learning_rate": 3.0407462769196704e-05,
"loss": 1.3,
"step": 18170
},
{
"epoch": 13.58,
"learning_rate": 3.0393622321873445e-05,
"loss": 1.3208,
"step": 18180
},
{
"epoch": 13.59,
"learning_rate": 3.037978187455019e-05,
"loss": 1.3129,
"step": 18190
},
{
"epoch": 13.6,
"learning_rate": 3.036594142722693e-05,
"loss": 1.3256,
"step": 18200
},
{
"epoch": 13.6,
"learning_rate": 3.035210097990367e-05,
"loss": 1.3458,
"step": 18210
},
{
"epoch": 13.61,
"learning_rate": 3.0338260532580413e-05,
"loss": 1.3108,
"step": 18220
},
{
"epoch": 13.62,
"learning_rate": 3.0324420085257155e-05,
"loss": 1.3177,
"step": 18230
},
{
"epoch": 13.63,
"learning_rate": 3.03105796379339e-05,
"loss": 1.2957,
"step": 18240
},
{
"epoch": 13.63,
"learning_rate": 3.029673919061064e-05,
"loss": 1.2519,
"step": 18250
},
{
"epoch": 13.64,
"learning_rate": 3.0282898743287385e-05,
"loss": 1.3077,
"step": 18260
},
{
"epoch": 13.65,
"learning_rate": 3.0269058295964126e-05,
"loss": 1.3277,
"step": 18270
},
{
"epoch": 13.66,
"learning_rate": 3.025521784864087e-05,
"loss": 1.3405,
"step": 18280
},
{
"epoch": 13.66,
"learning_rate": 3.0241377401317612e-05,
"loss": 1.3016,
"step": 18290
},
{
"epoch": 13.67,
"learning_rate": 3.0227536953994357e-05,
"loss": 1.3186,
"step": 18300
},
{
"epoch": 13.68,
"learning_rate": 3.0213696506671095e-05,
"loss": 1.3132,
"step": 18310
},
{
"epoch": 13.69,
"learning_rate": 3.0199856059347843e-05,
"loss": 1.3238,
"step": 18320
},
{
"epoch": 13.69,
"learning_rate": 3.018601561202458e-05,
"loss": 1.3328,
"step": 18330
},
{
"epoch": 13.7,
"learning_rate": 3.017217516470133e-05,
"loss": 1.2811,
"step": 18340
},
{
"epoch": 13.71,
"learning_rate": 3.0158334717378067e-05,
"loss": 1.325,
"step": 18350
},
{
"epoch": 13.72,
"learning_rate": 3.0144494270054808e-05,
"loss": 1.3178,
"step": 18360
},
{
"epoch": 13.72,
"learning_rate": 3.0130653822731552e-05,
"loss": 1.3463,
"step": 18370
},
{
"epoch": 13.73,
"learning_rate": 3.0116813375408294e-05,
"loss": 1.3198,
"step": 18380
},
{
"epoch": 13.74,
"learning_rate": 3.010297292808504e-05,
"loss": 1.339,
"step": 18390
},
{
"epoch": 13.75,
"learning_rate": 3.008913248076178e-05,
"loss": 1.3328,
"step": 18400
},
{
"epoch": 13.75,
"learning_rate": 3.0075292033438524e-05,
"loss": 1.3243,
"step": 18410
},
{
"epoch": 13.76,
"learning_rate": 3.0061451586115262e-05,
"loss": 1.3072,
"step": 18420
},
{
"epoch": 13.77,
"learning_rate": 3.004761113879201e-05,
"loss": 1.3555,
"step": 18430
},
{
"epoch": 13.78,
"learning_rate": 3.0033770691468748e-05,
"loss": 1.322,
"step": 18440
},
{
"epoch": 13.78,
"learning_rate": 3.0019930244145496e-05,
"loss": 1.3404,
"step": 18450
},
{
"epoch": 13.79,
"learning_rate": 3.0006089796822234e-05,
"loss": 1.3296,
"step": 18460
},
{
"epoch": 13.8,
"learning_rate": 2.999224934949898e-05,
"loss": 1.3284,
"step": 18470
},
{
"epoch": 13.81,
"learning_rate": 2.997840890217572e-05,
"loss": 1.3056,
"step": 18480
},
{
"epoch": 13.81,
"learning_rate": 2.996456845485246e-05,
"loss": 1.3042,
"step": 18490
},
{
"epoch": 13.82,
"learning_rate": 2.9950728007529206e-05,
"loss": 1.3154,
"step": 18500
},
{
"epoch": 13.83,
"learning_rate": 2.9936887560205944e-05,
"loss": 1.3566,
"step": 18510
},
{
"epoch": 13.84,
"learning_rate": 2.992304711288269e-05,
"loss": 1.3215,
"step": 18520
},
{
"epoch": 13.84,
"learning_rate": 2.990920666555943e-05,
"loss": 1.3326,
"step": 18530
},
{
"epoch": 13.85,
"learning_rate": 2.9895366218236178e-05,
"loss": 1.3311,
"step": 18540
},
{
"epoch": 13.86,
"learning_rate": 2.9881525770912915e-05,
"loss": 1.3332,
"step": 18550
},
{
"epoch": 13.87,
"learning_rate": 2.986768532358966e-05,
"loss": 1.3155,
"step": 18560
},
{
"epoch": 13.87,
"learning_rate": 2.98538448762664e-05,
"loss": 1.3197,
"step": 18570
},
{
"epoch": 13.88,
"learning_rate": 2.9840004428943146e-05,
"loss": 1.3234,
"step": 18580
},
{
"epoch": 13.89,
"learning_rate": 2.9826163981619887e-05,
"loss": 1.3578,
"step": 18590
},
{
"epoch": 13.9,
"learning_rate": 2.9812323534296632e-05,
"loss": 1.3107,
"step": 18600
},
{
"epoch": 13.9,
"learning_rate": 2.9798483086973373e-05,
"loss": 1.3117,
"step": 18610
},
{
"epoch": 13.91,
"learning_rate": 2.978464263965011e-05,
"loss": 1.3373,
"step": 18620
},
{
"epoch": 13.92,
"learning_rate": 2.977080219232686e-05,
"loss": 1.3197,
"step": 18630
},
{
"epoch": 13.93,
"learning_rate": 2.9756961745003597e-05,
"loss": 1.3142,
"step": 18640
},
{
"epoch": 13.93,
"learning_rate": 2.974312129768034e-05,
"loss": 1.3453,
"step": 18650
},
{
"epoch": 13.94,
"learning_rate": 2.9729280850357083e-05,
"loss": 1.3348,
"step": 18660
},
{
"epoch": 13.95,
"learning_rate": 2.9715440403033827e-05,
"loss": 1.3699,
"step": 18670
},
{
"epoch": 13.96,
"learning_rate": 2.970159995571057e-05,
"loss": 1.3337,
"step": 18680
},
{
"epoch": 13.96,
"learning_rate": 2.9687759508387313e-05,
"loss": 1.3268,
"step": 18690
},
{
"epoch": 13.97,
"learning_rate": 2.9673919061064055e-05,
"loss": 1.3503,
"step": 18700
},
{
"epoch": 13.98,
"learning_rate": 2.96600786137408e-05,
"loss": 1.3453,
"step": 18710
},
{
"epoch": 13.99,
"learning_rate": 2.964623816641754e-05,
"loss": 1.3201,
"step": 18720
},
{
"epoch": 13.99,
"learning_rate": 2.9632397719094285e-05,
"loss": 1.3439,
"step": 18730
},
{
"epoch": 14.0,
"eval_accuracy": 0.5523399338200535,
"eval_loss": 1.0509512424468994,
"eval_runtime": 252.1883,
"eval_samples_per_second": 75.495,
"eval_steps_per_second": 2.359,
"step": 18739
},
{
"epoch": 14.0,
"learning_rate": 2.9618557271771026e-05,
"loss": 1.2762,
"step": 18740
},
{
"epoch": 14.01,
"learning_rate": 2.960471682444777e-05,
"loss": 1.3122,
"step": 18750
},
{
"epoch": 14.02,
"learning_rate": 2.959087637712451e-05,
"loss": 1.301,
"step": 18760
},
{
"epoch": 14.02,
"learning_rate": 2.957703592980125e-05,
"loss": 1.3217,
"step": 18770
},
{
"epoch": 14.03,
"learning_rate": 2.9563195482477995e-05,
"loss": 1.3186,
"step": 18780
},
{
"epoch": 14.04,
"learning_rate": 2.9549355035154736e-05,
"loss": 1.2862,
"step": 18790
},
{
"epoch": 14.05,
"learning_rate": 2.953551458783148e-05,
"loss": 1.3176,
"step": 18800
},
{
"epoch": 14.05,
"learning_rate": 2.9521674140508222e-05,
"loss": 1.351,
"step": 18810
},
{
"epoch": 14.06,
"learning_rate": 2.9507833693184967e-05,
"loss": 1.3182,
"step": 18820
},
{
"epoch": 14.07,
"learning_rate": 2.9493993245861708e-05,
"loss": 1.3107,
"step": 18830
},
{
"epoch": 14.08,
"learning_rate": 2.9480152798538452e-05,
"loss": 1.2923,
"step": 18840
},
{
"epoch": 14.08,
"learning_rate": 2.946631235121519e-05,
"loss": 1.3299,
"step": 18850
},
{
"epoch": 14.09,
"learning_rate": 2.9452471903891938e-05,
"loss": 1.3073,
"step": 18860
},
{
"epoch": 14.1,
"learning_rate": 2.9438631456568676e-05,
"loss": 1.2933,
"step": 18870
},
{
"epoch": 14.11,
"learning_rate": 2.9424791009245424e-05,
"loss": 1.2612,
"step": 18880
},
{
"epoch": 14.11,
"learning_rate": 2.9410950561922162e-05,
"loss": 1.3018,
"step": 18890
},
{
"epoch": 14.12,
"learning_rate": 2.9397110114598903e-05,
"loss": 1.315,
"step": 18900
},
{
"epoch": 14.13,
"learning_rate": 2.9383269667275648e-05,
"loss": 1.3015,
"step": 18910
},
{
"epoch": 14.14,
"learning_rate": 2.936942921995239e-05,
"loss": 1.3045,
"step": 18920
},
{
"epoch": 14.14,
"learning_rate": 2.9355588772629134e-05,
"loss": 1.2953,
"step": 18930
},
{
"epoch": 14.15,
"learning_rate": 2.9341748325305872e-05,
"loss": 1.2892,
"step": 18940
},
{
"epoch": 14.16,
"learning_rate": 2.932790787798262e-05,
"loss": 1.3039,
"step": 18950
},
{
"epoch": 14.17,
"learning_rate": 2.9314067430659358e-05,
"loss": 1.3079,
"step": 18960
},
{
"epoch": 14.17,
"learning_rate": 2.9300226983336106e-05,
"loss": 1.328,
"step": 18970
},
{
"epoch": 14.18,
"learning_rate": 2.9286386536012844e-05,
"loss": 1.3108,
"step": 18980
},
{
"epoch": 14.19,
"learning_rate": 2.9272546088689588e-05,
"loss": 1.3394,
"step": 18990
},
{
"epoch": 14.19,
"learning_rate": 2.925870564136633e-05,
"loss": 1.333,
"step": 19000
},
{
"epoch": 14.2,
"learning_rate": 2.9244865194043074e-05,
"loss": 1.3207,
"step": 19010
},
{
"epoch": 14.21,
"learning_rate": 2.9231024746719815e-05,
"loss": 1.2658,
"step": 19020
},
{
"epoch": 14.22,
"learning_rate": 2.921718429939656e-05,
"loss": 1.2944,
"step": 19030
},
{
"epoch": 14.22,
"learning_rate": 2.92033438520733e-05,
"loss": 1.3032,
"step": 19040
},
{
"epoch": 14.23,
"learning_rate": 2.918950340475004e-05,
"loss": 1.2944,
"step": 19050
},
{
"epoch": 14.24,
"learning_rate": 2.9175662957426787e-05,
"loss": 1.3382,
"step": 19060
},
{
"epoch": 14.25,
"learning_rate": 2.9161822510103525e-05,
"loss": 1.3043,
"step": 19070
},
{
"epoch": 14.25,
"learning_rate": 2.9147982062780273e-05,
"loss": 1.2914,
"step": 19080
},
{
"epoch": 14.26,
"learning_rate": 2.913414161545701e-05,
"loss": 1.3205,
"step": 19090
},
{
"epoch": 14.27,
"learning_rate": 2.9120301168133756e-05,
"loss": 1.3353,
"step": 19100
},
{
"epoch": 14.28,
"learning_rate": 2.9106460720810497e-05,
"loss": 1.3082,
"step": 19110
},
{
"epoch": 14.28,
"learning_rate": 2.909262027348724e-05,
"loss": 1.3287,
"step": 19120
},
{
"epoch": 14.29,
"learning_rate": 2.9078779826163983e-05,
"loss": 1.2765,
"step": 19130
},
{
"epoch": 14.3,
"learning_rate": 2.9064939378840727e-05,
"loss": 1.2999,
"step": 19140
},
{
"epoch": 14.31,
"learning_rate": 2.905109893151747e-05,
"loss": 1.281,
"step": 19150
},
{
"epoch": 14.31,
"learning_rate": 2.9037258484194213e-05,
"loss": 1.33,
"step": 19160
},
{
"epoch": 14.32,
"learning_rate": 2.9023418036870954e-05,
"loss": 1.3224,
"step": 19170
},
{
"epoch": 14.33,
"learning_rate": 2.9009577589547692e-05,
"loss": 1.3117,
"step": 19180
},
{
"epoch": 14.34,
"learning_rate": 2.8995737142224437e-05,
"loss": 1.3138,
"step": 19190
},
{
"epoch": 14.34,
"learning_rate": 2.8981896694901178e-05,
"loss": 1.3177,
"step": 19200
},
{
"epoch": 14.35,
"learning_rate": 2.8968056247577923e-05,
"loss": 1.3073,
"step": 19210
},
{
"epoch": 14.36,
"learning_rate": 2.8954215800254664e-05,
"loss": 1.2876,
"step": 19220
},
{
"epoch": 14.37,
"learning_rate": 2.894037535293141e-05,
"loss": 1.3206,
"step": 19230
},
{
"epoch": 14.37,
"learning_rate": 2.892653490560815e-05,
"loss": 1.3525,
"step": 19240
},
{
"epoch": 14.38,
"learning_rate": 2.8912694458284895e-05,
"loss": 1.3185,
"step": 19250
},
{
"epoch": 14.39,
"learning_rate": 2.8898854010961636e-05,
"loss": 1.3296,
"step": 19260
},
{
"epoch": 14.4,
"learning_rate": 2.888501356363838e-05,
"loss": 1.295,
"step": 19270
},
{
"epoch": 14.4,
"learning_rate": 2.887117311631512e-05,
"loss": 1.3045,
"step": 19280
},
{
"epoch": 14.41,
"learning_rate": 2.8857332668991866e-05,
"loss": 1.3325,
"step": 19290
},
{
"epoch": 14.42,
"learning_rate": 2.8843492221668604e-05,
"loss": 1.3046,
"step": 19300
},
{
"epoch": 14.43,
"learning_rate": 2.8829651774345352e-05,
"loss": 1.3166,
"step": 19310
},
{
"epoch": 14.43,
"learning_rate": 2.881581132702209e-05,
"loss": 1.295,
"step": 19320
},
{
"epoch": 14.44,
"learning_rate": 2.880197087969883e-05,
"loss": 1.318,
"step": 19330
},
{
"epoch": 14.45,
"learning_rate": 2.8788130432375576e-05,
"loss": 1.3255,
"step": 19340
},
{
"epoch": 14.46,
"learning_rate": 2.8774289985052317e-05,
"loss": 1.2826,
"step": 19350
},
{
"epoch": 14.46,
"learning_rate": 2.8760449537729062e-05,
"loss": 1.2853,
"step": 19360
},
{
"epoch": 14.47,
"learning_rate": 2.8746609090405803e-05,
"loss": 1.3027,
"step": 19370
},
{
"epoch": 14.48,
"learning_rate": 2.8732768643082548e-05,
"loss": 1.3405,
"step": 19380
},
{
"epoch": 14.49,
"learning_rate": 2.8718928195759286e-05,
"loss": 1.3245,
"step": 19390
},
{
"epoch": 14.49,
"learning_rate": 2.8705087748436034e-05,
"loss": 1.2999,
"step": 19400
},
{
"epoch": 14.5,
"learning_rate": 2.869124730111277e-05,
"loss": 1.3404,
"step": 19410
},
{
"epoch": 14.51,
"learning_rate": 2.8677406853789516e-05,
"loss": 1.3169,
"step": 19420
},
{
"epoch": 14.52,
"learning_rate": 2.8663566406466258e-05,
"loss": 1.2908,
"step": 19430
},
{
"epoch": 14.52,
"learning_rate": 2.8649725959143002e-05,
"loss": 1.3262,
"step": 19440
},
{
"epoch": 14.53,
"learning_rate": 2.8635885511819743e-05,
"loss": 1.323,
"step": 19450
},
{
"epoch": 14.54,
"learning_rate": 2.8622045064496485e-05,
"loss": 1.2787,
"step": 19460
},
{
"epoch": 14.55,
"learning_rate": 2.860820461717323e-05,
"loss": 1.2807,
"step": 19470
},
{
"epoch": 14.55,
"learning_rate": 2.8594364169849967e-05,
"loss": 1.3088,
"step": 19480
},
{
"epoch": 14.56,
"learning_rate": 2.8580523722526715e-05,
"loss": 1.3234,
"step": 19490
},
{
"epoch": 14.57,
"learning_rate": 2.8566683275203453e-05,
"loss": 1.2843,
"step": 19500
},
{
"epoch": 14.58,
"learning_rate": 2.85528428278802e-05,
"loss": 1.3407,
"step": 19510
},
{
"epoch": 14.58,
"learning_rate": 2.853900238055694e-05,
"loss": 1.2892,
"step": 19520
},
{
"epoch": 14.59,
"learning_rate": 2.8525161933233684e-05,
"loss": 1.334,
"step": 19530
},
{
"epoch": 14.6,
"learning_rate": 2.8511321485910425e-05,
"loss": 1.2977,
"step": 19540
},
{
"epoch": 14.61,
"learning_rate": 2.849748103858717e-05,
"loss": 1.3486,
"step": 19550
},
{
"epoch": 14.61,
"learning_rate": 2.848364059126391e-05,
"loss": 1.2967,
"step": 19560
},
{
"epoch": 14.62,
"learning_rate": 2.8469800143940655e-05,
"loss": 1.3279,
"step": 19570
},
{
"epoch": 14.63,
"learning_rate": 2.8455959696617397e-05,
"loss": 1.279,
"step": 19580
},
{
"epoch": 14.64,
"learning_rate": 2.844211924929414e-05,
"loss": 1.3168,
"step": 19590
},
{
"epoch": 14.64,
"learning_rate": 2.8428278801970883e-05,
"loss": 1.3059,
"step": 19600
},
{
"epoch": 14.65,
"learning_rate": 2.841443835464762e-05,
"loss": 1.3045,
"step": 19610
},
{
"epoch": 14.66,
"learning_rate": 2.8400597907324365e-05,
"loss": 1.2907,
"step": 19620
},
{
"epoch": 14.67,
"learning_rate": 2.8386757460001106e-05,
"loss": 1.3652,
"step": 19630
},
{
"epoch": 14.67,
"learning_rate": 2.837291701267785e-05,
"loss": 1.297,
"step": 19640
},
{
"epoch": 14.68,
"learning_rate": 2.8359076565354592e-05,
"loss": 1.3143,
"step": 19650
},
{
"epoch": 14.69,
"learning_rate": 2.8345236118031337e-05,
"loss": 1.3401,
"step": 19660
},
{
"epoch": 14.7,
"learning_rate": 2.8331395670708078e-05,
"loss": 1.321,
"step": 19670
},
{
"epoch": 14.7,
"learning_rate": 2.8317555223384823e-05,
"loss": 1.2787,
"step": 19680
},
{
"epoch": 14.71,
"learning_rate": 2.8303714776061564e-05,
"loss": 1.3098,
"step": 19690
},
{
"epoch": 14.72,
"learning_rate": 2.828987432873831e-05,
"loss": 1.3116,
"step": 19700
},
{
"epoch": 14.73,
"learning_rate": 2.827603388141505e-05,
"loss": 1.3011,
"step": 19710
},
{
"epoch": 14.73,
"learning_rate": 2.8262193434091795e-05,
"loss": 1.3251,
"step": 19720
},
{
"epoch": 14.74,
"learning_rate": 2.8248352986768532e-05,
"loss": 1.3432,
"step": 19730
},
{
"epoch": 14.75,
"learning_rate": 2.8234512539445274e-05,
"loss": 1.2794,
"step": 19740
},
{
"epoch": 14.76,
"learning_rate": 2.822067209212202e-05,
"loss": 1.306,
"step": 19750
},
{
"epoch": 14.76,
"learning_rate": 2.820683164479876e-05,
"loss": 1.2977,
"step": 19760
},
{
"epoch": 14.77,
"learning_rate": 2.8192991197475504e-05,
"loss": 1.3047,
"step": 19770
},
{
"epoch": 14.78,
"learning_rate": 2.8179150750152245e-05,
"loss": 1.3211,
"step": 19780
},
{
"epoch": 14.79,
"learning_rate": 2.816531030282899e-05,
"loss": 1.2861,
"step": 19790
},
{
"epoch": 14.79,
"learning_rate": 2.815146985550573e-05,
"loss": 1.2856,
"step": 19800
},
{
"epoch": 14.8,
"learning_rate": 2.8137629408182476e-05,
"loss": 1.3072,
"step": 19810
},
{
"epoch": 14.81,
"learning_rate": 2.8123788960859214e-05,
"loss": 1.3423,
"step": 19820
},
{
"epoch": 14.82,
"learning_rate": 2.8109948513535962e-05,
"loss": 1.3192,
"step": 19830
},
{
"epoch": 14.82,
"learning_rate": 2.80961080662127e-05,
"loss": 1.3451,
"step": 19840
},
{
"epoch": 14.83,
"learning_rate": 2.8082267618889448e-05,
"loss": 1.311,
"step": 19850
},
{
"epoch": 14.84,
"learning_rate": 2.8068427171566186e-05,
"loss": 1.3185,
"step": 19860
},
{
"epoch": 14.84,
"learning_rate": 2.8054586724242927e-05,
"loss": 1.3046,
"step": 19870
},
{
"epoch": 14.85,
"learning_rate": 2.804074627691967e-05,
"loss": 1.2974,
"step": 19880
},
{
"epoch": 14.86,
"learning_rate": 2.8026905829596413e-05,
"loss": 1.3238,
"step": 19890
},
{
"epoch": 14.87,
"learning_rate": 2.8013065382273157e-05,
"loss": 1.3143,
"step": 19900
},
{
"epoch": 14.87,
"learning_rate": 2.7999224934949895e-05,
"loss": 1.3484,
"step": 19910
},
{
"epoch": 14.88,
"learning_rate": 2.7985384487626643e-05,
"loss": 1.3167,
"step": 19920
},
{
"epoch": 14.89,
"learning_rate": 2.797154404030338e-05,
"loss": 1.3626,
"step": 19930
},
{
"epoch": 14.9,
"learning_rate": 2.795770359298013e-05,
"loss": 1.3458,
"step": 19940
},
{
"epoch": 14.9,
"learning_rate": 2.7943863145656867e-05,
"loss": 1.3323,
"step": 19950
},
{
"epoch": 14.91,
"learning_rate": 2.7930022698333612e-05,
"loss": 1.3331,
"step": 19960
},
{
"epoch": 14.92,
"learning_rate": 2.7916182251010353e-05,
"loss": 1.3093,
"step": 19970
},
{
"epoch": 14.93,
"learning_rate": 2.7902341803687098e-05,
"loss": 1.2626,
"step": 19980
},
{
"epoch": 14.93,
"learning_rate": 2.788850135636384e-05,
"loss": 1.2979,
"step": 19990
},
{
"epoch": 14.94,
"learning_rate": 2.7874660909040584e-05,
"loss": 1.314,
"step": 20000
},
{
"epoch": 14.95,
"learning_rate": 2.7860820461717325e-05,
"loss": 1.3274,
"step": 20010
},
{
"epoch": 14.96,
"learning_rate": 2.7846980014394063e-05,
"loss": 1.3298,
"step": 20020
},
{
"epoch": 14.96,
"learning_rate": 2.783313956707081e-05,
"loss": 1.2945,
"step": 20030
},
{
"epoch": 14.97,
"learning_rate": 2.781929911974755e-05,
"loss": 1.3127,
"step": 20040
},
{
"epoch": 14.98,
"learning_rate": 2.7805458672424297e-05,
"loss": 1.2996,
"step": 20050
},
{
"epoch": 14.99,
"learning_rate": 2.7791618225101034e-05,
"loss": 1.2938,
"step": 20060
},
{
"epoch": 14.99,
"learning_rate": 2.777777777777778e-05,
"loss": 1.3371,
"step": 20070
},
{
"epoch": 15.0,
"eval_accuracy": 0.579494721361416,
"eval_loss": 0.9819969534873962,
"eval_runtime": 233.8037,
"eval_samples_per_second": 81.432,
"eval_steps_per_second": 2.545,
"step": 20077
},
{
"epoch": 15.0,
"learning_rate": 2.776393733045452e-05,
"loss": 1.3029,
"step": 20080
},
{
"epoch": 15.01,
"learning_rate": 2.7750096883131265e-05,
"loss": 1.3241,
"step": 20090
},
{
"epoch": 15.02,
"learning_rate": 2.7736256435808006e-05,
"loss": 1.2938,
"step": 20100
},
{
"epoch": 15.02,
"learning_rate": 2.772241598848475e-05,
"loss": 1.3097,
"step": 20110
},
{
"epoch": 15.03,
"learning_rate": 2.7708575541161492e-05,
"loss": 1.2586,
"step": 20120
},
{
"epoch": 15.04,
"learning_rate": 2.7694735093838237e-05,
"loss": 1.2748,
"step": 20130
},
{
"epoch": 15.05,
"learning_rate": 2.7680894646514978e-05,
"loss": 1.2857,
"step": 20140
},
{
"epoch": 15.05,
"learning_rate": 2.7667054199191716e-05,
"loss": 1.2635,
"step": 20150
},
{
"epoch": 15.06,
"learning_rate": 2.765321375186846e-05,
"loss": 1.3154,
"step": 20160
},
{
"epoch": 15.07,
"learning_rate": 2.7639373304545202e-05,
"loss": 1.3188,
"step": 20170
},
{
"epoch": 15.08,
"learning_rate": 2.7625532857221946e-05,
"loss": 1.3626,
"step": 20180
},
{
"epoch": 15.08,
"learning_rate": 2.7611692409898688e-05,
"loss": 1.2923,
"step": 20190
},
{
"epoch": 15.09,
"learning_rate": 2.7597851962575432e-05,
"loss": 1.3441,
"step": 20200
},
{
"epoch": 15.1,
"learning_rate": 2.7584011515252174e-05,
"loss": 1.3414,
"step": 20210
},
{
"epoch": 15.11,
"learning_rate": 2.7570171067928918e-05,
"loss": 1.3067,
"step": 20220
},
{
"epoch": 15.11,
"learning_rate": 2.755633062060566e-05,
"loss": 1.3259,
"step": 20230
},
{
"epoch": 15.12,
"learning_rate": 2.7542490173282404e-05,
"loss": 1.3072,
"step": 20240
},
{
"epoch": 15.13,
"learning_rate": 2.7528649725959142e-05,
"loss": 1.3236,
"step": 20250
},
{
"epoch": 15.14,
"learning_rate": 2.751480927863589e-05,
"loss": 1.2672,
"step": 20260
},
{
"epoch": 15.14,
"learning_rate": 2.7500968831312628e-05,
"loss": 1.3066,
"step": 20270
},
{
"epoch": 15.15,
"learning_rate": 2.7487128383989376e-05,
"loss": 1.3072,
"step": 20280
},
{
"epoch": 15.16,
"learning_rate": 2.7473287936666114e-05,
"loss": 1.3259,
"step": 20290
},
{
"epoch": 15.17,
"learning_rate": 2.7459447489342855e-05,
"loss": 1.2773,
"step": 20300
},
{
"epoch": 15.17,
"learning_rate": 2.74456070420196e-05,
"loss": 1.3068,
"step": 20310
},
{
"epoch": 15.18,
"learning_rate": 2.743176659469634e-05,
"loss": 1.3085,
"step": 20320
},
{
"epoch": 15.19,
"learning_rate": 2.7417926147373086e-05,
"loss": 1.2931,
"step": 20330
},
{
"epoch": 15.2,
"learning_rate": 2.7404085700049827e-05,
"loss": 1.3239,
"step": 20340
},
{
"epoch": 15.2,
"learning_rate": 2.739024525272657e-05,
"loss": 1.2873,
"step": 20350
},
{
"epoch": 15.21,
"learning_rate": 2.737640480540331e-05,
"loss": 1.3474,
"step": 20360
},
{
"epoch": 15.22,
"learning_rate": 2.7362564358080057e-05,
"loss": 1.3072,
"step": 20370
},
{
"epoch": 15.23,
"learning_rate": 2.7348723910756795e-05,
"loss": 1.2801,
"step": 20380
},
{
"epoch": 15.23,
"learning_rate": 2.733488346343354e-05,
"loss": 1.3209,
"step": 20390
},
{
"epoch": 15.24,
"learning_rate": 2.732104301611028e-05,
"loss": 1.3102,
"step": 20400
},
{
"epoch": 15.25,
"learning_rate": 2.7307202568787026e-05,
"loss": 1.3272,
"step": 20410
},
{
"epoch": 15.26,
"learning_rate": 2.7293362121463767e-05,
"loss": 1.3256,
"step": 20420
},
{
"epoch": 15.26,
"learning_rate": 2.7279521674140508e-05,
"loss": 1.3298,
"step": 20430
},
{
"epoch": 15.27,
"learning_rate": 2.7265681226817253e-05,
"loss": 1.3611,
"step": 20440
},
{
"epoch": 15.28,
"learning_rate": 2.725184077949399e-05,
"loss": 1.3009,
"step": 20450
},
{
"epoch": 15.29,
"learning_rate": 2.723800033217074e-05,
"loss": 1.316,
"step": 20460
},
{
"epoch": 15.29,
"learning_rate": 2.7224159884847477e-05,
"loss": 1.2443,
"step": 20470
},
{
"epoch": 15.3,
"learning_rate": 2.7210319437524225e-05,
"loss": 1.3147,
"step": 20480
},
{
"epoch": 15.31,
"learning_rate": 2.7196478990200963e-05,
"loss": 1.2814,
"step": 20490
},
{
"epoch": 15.32,
"learning_rate": 2.7182638542877707e-05,
"loss": 1.3216,
"step": 20500
},
{
"epoch": 15.32,
"learning_rate": 2.716879809555445e-05,
"loss": 1.3165,
"step": 20510
},
{
"epoch": 15.33,
"learning_rate": 2.7154957648231193e-05,
"loss": 1.2864,
"step": 20520
},
{
"epoch": 15.34,
"learning_rate": 2.7141117200907934e-05,
"loss": 1.299,
"step": 20530
},
{
"epoch": 15.35,
"learning_rate": 2.712727675358468e-05,
"loss": 1.2805,
"step": 20540
},
{
"epoch": 15.35,
"learning_rate": 2.711343630626142e-05,
"loss": 1.2715,
"step": 20550
},
{
"epoch": 15.36,
"learning_rate": 2.7099595858938165e-05,
"loss": 1.2595,
"step": 20560
},
{
"epoch": 15.37,
"learning_rate": 2.7085755411614906e-05,
"loss": 1.2848,
"step": 20570
},
{
"epoch": 15.38,
"learning_rate": 2.7071914964291644e-05,
"loss": 1.3279,
"step": 20580
},
{
"epoch": 15.38,
"learning_rate": 2.705807451696839e-05,
"loss": 1.3118,
"step": 20590
},
{
"epoch": 15.39,
"learning_rate": 2.704423406964513e-05,
"loss": 1.3222,
"step": 20600
},
{
"epoch": 15.4,
"learning_rate": 2.7030393622321875e-05,
"loss": 1.3153,
"step": 20610
},
{
"epoch": 15.41,
"learning_rate": 2.7016553174998616e-05,
"loss": 1.2874,
"step": 20620
},
{
"epoch": 15.41,
"learning_rate": 2.700271272767536e-05,
"loss": 1.3238,
"step": 20630
},
{
"epoch": 15.42,
"learning_rate": 2.6988872280352102e-05,
"loss": 1.2848,
"step": 20640
},
{
"epoch": 15.43,
"learning_rate": 2.6975031833028846e-05,
"loss": 1.3213,
"step": 20650
},
{
"epoch": 15.44,
"learning_rate": 2.6961191385705588e-05,
"loss": 1.2916,
"step": 20660
},
{
"epoch": 15.44,
"learning_rate": 2.6947350938382332e-05,
"loss": 1.2418,
"step": 20670
},
{
"epoch": 15.45,
"learning_rate": 2.6933510491059074e-05,
"loss": 1.3343,
"step": 20680
},
{
"epoch": 15.46,
"learning_rate": 2.6919670043735818e-05,
"loss": 1.3072,
"step": 20690
},
{
"epoch": 15.47,
"learning_rate": 2.6905829596412556e-05,
"loss": 1.2876,
"step": 20700
},
{
"epoch": 15.47,
"learning_rate": 2.6891989149089297e-05,
"loss": 1.2926,
"step": 20710
},
{
"epoch": 15.48,
"learning_rate": 2.6878148701766042e-05,
"loss": 1.3145,
"step": 20720
},
{
"epoch": 15.49,
"learning_rate": 2.6864308254442783e-05,
"loss": 1.3204,
"step": 20730
},
{
"epoch": 15.49,
"learning_rate": 2.6850467807119528e-05,
"loss": 1.2851,
"step": 20740
},
{
"epoch": 15.5,
"learning_rate": 2.683662735979627e-05,
"loss": 1.2883,
"step": 20750
},
{
"epoch": 15.51,
"learning_rate": 2.6822786912473014e-05,
"loss": 1.304,
"step": 20760
},
{
"epoch": 15.52,
"learning_rate": 2.6808946465149755e-05,
"loss": 1.3245,
"step": 20770
},
{
"epoch": 15.52,
"learning_rate": 2.67951060178265e-05,
"loss": 1.2917,
"step": 20780
},
{
"epoch": 15.53,
"learning_rate": 2.6781265570503237e-05,
"loss": 1.3116,
"step": 20790
},
{
"epoch": 15.54,
"learning_rate": 2.6767425123179986e-05,
"loss": 1.29,
"step": 20800
},
{
"epoch": 15.55,
"learning_rate": 2.6753584675856723e-05,
"loss": 1.3335,
"step": 20810
},
{
"epoch": 15.55,
"learning_rate": 2.673974422853347e-05,
"loss": 1.2884,
"step": 20820
},
{
"epoch": 15.56,
"learning_rate": 2.672590378121021e-05,
"loss": 1.3486,
"step": 20830
},
{
"epoch": 15.57,
"learning_rate": 2.6712063333886954e-05,
"loss": 1.2992,
"step": 20840
},
{
"epoch": 15.58,
"learning_rate": 2.6698222886563695e-05,
"loss": 1.2985,
"step": 20850
},
{
"epoch": 15.58,
"learning_rate": 2.6684382439240436e-05,
"loss": 1.3149,
"step": 20860
},
{
"epoch": 15.59,
"learning_rate": 2.667054199191718e-05,
"loss": 1.3027,
"step": 20870
},
{
"epoch": 15.6,
"learning_rate": 2.665670154459392e-05,
"loss": 1.3261,
"step": 20880
},
{
"epoch": 15.61,
"learning_rate": 2.6642861097270667e-05,
"loss": 1.295,
"step": 20890
},
{
"epoch": 15.61,
"learning_rate": 2.6629020649947405e-05,
"loss": 1.3562,
"step": 20900
},
{
"epoch": 15.62,
"learning_rate": 2.6615180202624153e-05,
"loss": 1.2658,
"step": 20910
},
{
"epoch": 15.63,
"learning_rate": 2.660133975530089e-05,
"loss": 1.3441,
"step": 20920
},
{
"epoch": 15.64,
"learning_rate": 2.6587499307977635e-05,
"loss": 1.3042,
"step": 20930
},
{
"epoch": 15.64,
"learning_rate": 2.6573658860654377e-05,
"loss": 1.2905,
"step": 20940
},
{
"epoch": 15.65,
"learning_rate": 2.655981841333112e-05,
"loss": 1.2781,
"step": 20950
},
{
"epoch": 15.66,
"learning_rate": 2.6545977966007863e-05,
"loss": 1.2914,
"step": 20960
},
{
"epoch": 15.67,
"learning_rate": 2.6532137518684607e-05,
"loss": 1.3265,
"step": 20970
},
{
"epoch": 15.67,
"learning_rate": 2.651829707136135e-05,
"loss": 1.3374,
"step": 20980
},
{
"epoch": 15.68,
"learning_rate": 2.6504456624038086e-05,
"loss": 1.3094,
"step": 20990
},
{
"epoch": 15.69,
"learning_rate": 2.6490616176714834e-05,
"loss": 1.2563,
"step": 21000
},
{
"epoch": 15.7,
"learning_rate": 2.6476775729391572e-05,
"loss": 1.3448,
"step": 21010
},
{
"epoch": 15.7,
"learning_rate": 2.6462935282068317e-05,
"loss": 1.3008,
"step": 21020
},
{
"epoch": 15.71,
"learning_rate": 2.6449094834745058e-05,
"loss": 1.2828,
"step": 21030
},
{
"epoch": 15.72,
"learning_rate": 2.6435254387421803e-05,
"loss": 1.2903,
"step": 21040
},
{
"epoch": 15.73,
"learning_rate": 2.6421413940098544e-05,
"loss": 1.2728,
"step": 21050
},
{
"epoch": 15.73,
"learning_rate": 2.640757349277529e-05,
"loss": 1.3213,
"step": 21060
},
{
"epoch": 15.74,
"learning_rate": 2.639373304545203e-05,
"loss": 1.3335,
"step": 21070
},
{
"epoch": 15.75,
"learning_rate": 2.6379892598128775e-05,
"loss": 1.3151,
"step": 21080
},
{
"epoch": 15.76,
"learning_rate": 2.6366052150805516e-05,
"loss": 1.2964,
"step": 21090
},
{
"epoch": 15.76,
"learning_rate": 2.635221170348226e-05,
"loss": 1.3579,
"step": 21100
},
{
"epoch": 15.77,
"learning_rate": 2.6338371256159e-05,
"loss": 1.2726,
"step": 21110
},
{
"epoch": 15.78,
"learning_rate": 2.632453080883574e-05,
"loss": 1.2622,
"step": 21120
},
{
"epoch": 15.79,
"learning_rate": 2.6310690361512484e-05,
"loss": 1.3131,
"step": 21130
},
{
"epoch": 15.79,
"learning_rate": 2.6296849914189225e-05,
"loss": 1.3072,
"step": 21140
},
{
"epoch": 15.8,
"learning_rate": 2.628300946686597e-05,
"loss": 1.328,
"step": 21150
},
{
"epoch": 15.81,
"learning_rate": 2.626916901954271e-05,
"loss": 1.3179,
"step": 21160
},
{
"epoch": 15.82,
"learning_rate": 2.6255328572219456e-05,
"loss": 1.3063,
"step": 21170
},
{
"epoch": 15.82,
"learning_rate": 2.6241488124896197e-05,
"loss": 1.312,
"step": 21180
},
{
"epoch": 15.83,
"learning_rate": 2.6227647677572942e-05,
"loss": 1.3128,
"step": 21190
},
{
"epoch": 15.84,
"learning_rate": 2.6213807230249683e-05,
"loss": 1.2959,
"step": 21200
},
{
"epoch": 15.85,
"learning_rate": 2.6199966782926428e-05,
"loss": 1.3097,
"step": 21210
},
{
"epoch": 15.85,
"learning_rate": 2.6186126335603166e-05,
"loss": 1.2805,
"step": 21220
},
{
"epoch": 15.86,
"learning_rate": 2.6172285888279914e-05,
"loss": 1.2756,
"step": 21230
},
{
"epoch": 15.87,
"learning_rate": 2.615844544095665e-05,
"loss": 1.2996,
"step": 21240
},
{
"epoch": 15.88,
"learning_rate": 2.61446049936334e-05,
"loss": 1.2918,
"step": 21250
},
{
"epoch": 15.88,
"learning_rate": 2.6130764546310137e-05,
"loss": 1.3181,
"step": 21260
},
{
"epoch": 15.89,
"learning_rate": 2.611692409898688e-05,
"loss": 1.2927,
"step": 21270
},
{
"epoch": 15.9,
"learning_rate": 2.6103083651663623e-05,
"loss": 1.3071,
"step": 21280
},
{
"epoch": 15.91,
"learning_rate": 2.6089243204340365e-05,
"loss": 1.2731,
"step": 21290
},
{
"epoch": 15.91,
"learning_rate": 2.607540275701711e-05,
"loss": 1.318,
"step": 21300
},
{
"epoch": 15.92,
"learning_rate": 2.606156230969385e-05,
"loss": 1.3138,
"step": 21310
},
{
"epoch": 15.93,
"learning_rate": 2.6047721862370595e-05,
"loss": 1.297,
"step": 21320
},
{
"epoch": 15.94,
"learning_rate": 2.6033881415047333e-05,
"loss": 1.2375,
"step": 21330
},
{
"epoch": 15.94,
"learning_rate": 2.602004096772408e-05,
"loss": 1.2849,
"step": 21340
},
{
"epoch": 15.95,
"learning_rate": 2.600620052040082e-05,
"loss": 1.3101,
"step": 21350
},
{
"epoch": 15.96,
"learning_rate": 2.5992360073077564e-05,
"loss": 1.3048,
"step": 21360
},
{
"epoch": 15.97,
"learning_rate": 2.5978519625754305e-05,
"loss": 1.3144,
"step": 21370
},
{
"epoch": 15.97,
"learning_rate": 2.596467917843105e-05,
"loss": 1.2866,
"step": 21380
},
{
"epoch": 15.98,
"learning_rate": 2.595083873110779e-05,
"loss": 1.3081,
"step": 21390
},
{
"epoch": 15.99,
"learning_rate": 2.5936998283784532e-05,
"loss": 1.3333,
"step": 21400
},
{
"epoch": 16.0,
"learning_rate": 2.5923157836461277e-05,
"loss": 1.2835,
"step": 21410
},
{
"epoch": 16.0,
"eval_accuracy": 0.5738221545249226,
"eval_loss": 0.9885503053665161,
"eval_runtime": 265.3927,
"eval_samples_per_second": 71.739,
"eval_steps_per_second": 2.242,
"step": 21416
},
{
"epoch": 16.0,
"learning_rate": 2.5909317389138014e-05,
"loss": 1.2982,
"step": 21420
},
{
"epoch": 16.01,
"learning_rate": 2.5895476941814762e-05,
"loss": 1.3319,
"step": 21430
},
{
"epoch": 16.02,
"learning_rate": 2.58816364944915e-05,
"loss": 1.3062,
"step": 21440
},
{
"epoch": 16.03,
"learning_rate": 2.586779604716825e-05,
"loss": 1.2917,
"step": 21450
},
{
"epoch": 16.03,
"learning_rate": 2.5853955599844986e-05,
"loss": 1.2696,
"step": 21460
},
{
"epoch": 16.04,
"learning_rate": 2.584011515252173e-05,
"loss": 1.3222,
"step": 21470
},
{
"epoch": 16.05,
"learning_rate": 2.5826274705198472e-05,
"loss": 1.3079,
"step": 21480
},
{
"epoch": 16.06,
"learning_rate": 2.5812434257875217e-05,
"loss": 1.3055,
"step": 21490
},
{
"epoch": 16.06,
"learning_rate": 2.5798593810551958e-05,
"loss": 1.289,
"step": 21500
},
{
"epoch": 16.07,
"learning_rate": 2.5784753363228703e-05,
"loss": 1.2771,
"step": 21510
},
{
"epoch": 16.08,
"learning_rate": 2.5770912915905444e-05,
"loss": 1.3121,
"step": 21520
},
{
"epoch": 16.09,
"learning_rate": 2.575707246858219e-05,
"loss": 1.2913,
"step": 21530
},
{
"epoch": 16.09,
"learning_rate": 2.574323202125893e-05,
"loss": 1.3269,
"step": 21540
},
{
"epoch": 16.1,
"learning_rate": 2.5729391573935668e-05,
"loss": 1.2783,
"step": 21550
},
{
"epoch": 16.11,
"learning_rate": 2.5715551126612412e-05,
"loss": 1.2936,
"step": 21560
},
{
"epoch": 16.12,
"learning_rate": 2.5701710679289154e-05,
"loss": 1.2829,
"step": 21570
},
{
"epoch": 16.12,
"learning_rate": 2.5687870231965898e-05,
"loss": 1.3162,
"step": 21580
},
{
"epoch": 16.13,
"learning_rate": 2.567402978464264e-05,
"loss": 1.3454,
"step": 21590
},
{
"epoch": 16.14,
"learning_rate": 2.5660189337319384e-05,
"loss": 1.2886,
"step": 21600
},
{
"epoch": 16.14,
"learning_rate": 2.5646348889996125e-05,
"loss": 1.3045,
"step": 21610
},
{
"epoch": 16.15,
"learning_rate": 2.563250844267287e-05,
"loss": 1.2637,
"step": 21620
},
{
"epoch": 16.16,
"learning_rate": 2.561866799534961e-05,
"loss": 1.304,
"step": 21630
},
{
"epoch": 16.17,
"learning_rate": 2.5604827548026356e-05,
"loss": 1.3041,
"step": 21640
},
{
"epoch": 16.17,
"learning_rate": 2.5590987100703097e-05,
"loss": 1.3,
"step": 21650
},
{
"epoch": 16.18,
"learning_rate": 2.5577146653379842e-05,
"loss": 1.3098,
"step": 21660
},
{
"epoch": 16.19,
"learning_rate": 2.556330620605658e-05,
"loss": 1.3141,
"step": 21670
},
{
"epoch": 16.2,
"learning_rate": 2.554946575873332e-05,
"loss": 1.3343,
"step": 21680
},
{
"epoch": 16.2,
"learning_rate": 2.5535625311410066e-05,
"loss": 1.3019,
"step": 21690
},
{
"epoch": 16.21,
"learning_rate": 2.5521784864086807e-05,
"loss": 1.2556,
"step": 21700
},
{
"epoch": 16.22,
"learning_rate": 2.550794441676355e-05,
"loss": 1.2587,
"step": 21710
},
{
"epoch": 16.23,
"learning_rate": 2.5494103969440293e-05,
"loss": 1.3069,
"step": 21720
},
{
"epoch": 16.23,
"learning_rate": 2.5480263522117037e-05,
"loss": 1.3279,
"step": 21730
},
{
"epoch": 16.24,
"learning_rate": 2.546642307479378e-05,
"loss": 1.2748,
"step": 21740
},
{
"epoch": 16.25,
"learning_rate": 2.5452582627470523e-05,
"loss": 1.3123,
"step": 21750
},
{
"epoch": 16.26,
"learning_rate": 2.543874218014726e-05,
"loss": 1.299,
"step": 21760
},
{
"epoch": 16.26,
"learning_rate": 2.542490173282401e-05,
"loss": 1.2631,
"step": 21770
},
{
"epoch": 16.27,
"learning_rate": 2.5411061285500747e-05,
"loss": 1.2945,
"step": 21780
},
{
"epoch": 16.28,
"learning_rate": 2.5397220838177495e-05,
"loss": 1.2806,
"step": 21790
},
{
"epoch": 16.29,
"learning_rate": 2.5383380390854233e-05,
"loss": 1.3067,
"step": 21800
},
{
"epoch": 16.29,
"learning_rate": 2.5369539943530978e-05,
"loss": 1.3531,
"step": 21810
},
{
"epoch": 16.3,
"learning_rate": 2.535569949620772e-05,
"loss": 1.2994,
"step": 21820
},
{
"epoch": 16.31,
"learning_rate": 2.534185904888446e-05,
"loss": 1.2946,
"step": 21830
},
{
"epoch": 16.32,
"learning_rate": 2.5328018601561205e-05,
"loss": 1.3311,
"step": 21840
},
{
"epoch": 16.32,
"learning_rate": 2.5314178154237943e-05,
"loss": 1.3117,
"step": 21850
},
{
"epoch": 16.33,
"learning_rate": 2.530033770691469e-05,
"loss": 1.3302,
"step": 21860
},
{
"epoch": 16.34,
"learning_rate": 2.528649725959143e-05,
"loss": 1.2755,
"step": 21870
},
{
"epoch": 16.35,
"learning_rate": 2.5272656812268176e-05,
"loss": 1.3013,
"step": 21880
},
{
"epoch": 16.35,
"learning_rate": 2.5258816364944914e-05,
"loss": 1.3094,
"step": 21890
},
{
"epoch": 16.36,
"learning_rate": 2.524497591762166e-05,
"loss": 1.3286,
"step": 21900
},
{
"epoch": 16.37,
"learning_rate": 2.52311354702984e-05,
"loss": 1.3081,
"step": 21910
},
{
"epoch": 16.38,
"learning_rate": 2.5217295022975145e-05,
"loss": 1.3244,
"step": 21920
},
{
"epoch": 16.38,
"learning_rate": 2.5203454575651886e-05,
"loss": 1.3327,
"step": 21930
},
{
"epoch": 16.39,
"learning_rate": 2.518961412832863e-05,
"loss": 1.2688,
"step": 21940
},
{
"epoch": 16.4,
"learning_rate": 2.5175773681005372e-05,
"loss": 1.3005,
"step": 21950
},
{
"epoch": 16.41,
"learning_rate": 2.516193323368211e-05,
"loss": 1.2919,
"step": 21960
},
{
"epoch": 16.41,
"learning_rate": 2.5148092786358858e-05,
"loss": 1.3077,
"step": 21970
},
{
"epoch": 16.42,
"learning_rate": 2.5134252339035596e-05,
"loss": 1.272,
"step": 21980
},
{
"epoch": 16.43,
"learning_rate": 2.512041189171234e-05,
"loss": 1.33,
"step": 21990
},
{
"epoch": 16.44,
"learning_rate": 2.510657144438908e-05,
"loss": 1.3102,
"step": 22000
},
{
"epoch": 16.44,
"learning_rate": 2.5092730997065826e-05,
"loss": 1.3041,
"step": 22010
},
{
"epoch": 16.45,
"learning_rate": 2.5078890549742568e-05,
"loss": 1.295,
"step": 22020
},
{
"epoch": 16.46,
"learning_rate": 2.5065050102419312e-05,
"loss": 1.2923,
"step": 22030
},
{
"epoch": 16.47,
"learning_rate": 2.5051209655096053e-05,
"loss": 1.2995,
"step": 22040
},
{
"epoch": 16.47,
"learning_rate": 2.5037369207772798e-05,
"loss": 1.3025,
"step": 22050
},
{
"epoch": 16.48,
"learning_rate": 2.502352876044954e-05,
"loss": 1.2889,
"step": 22060
},
{
"epoch": 16.49,
"learning_rate": 2.5009688313126284e-05,
"loss": 1.2997,
"step": 22070
},
{
"epoch": 16.5,
"learning_rate": 2.4995847865803025e-05,
"loss": 1.2861,
"step": 22080
},
{
"epoch": 16.5,
"learning_rate": 2.4982007418479767e-05,
"loss": 1.2899,
"step": 22090
},
{
"epoch": 16.51,
"learning_rate": 2.4968166971156508e-05,
"loss": 1.3079,
"step": 22100
},
{
"epoch": 16.52,
"learning_rate": 2.4954326523833252e-05,
"loss": 1.2752,
"step": 22110
},
{
"epoch": 16.53,
"learning_rate": 2.4940486076509994e-05,
"loss": 1.2976,
"step": 22120
},
{
"epoch": 16.53,
"learning_rate": 2.492664562918674e-05,
"loss": 1.268,
"step": 22130
},
{
"epoch": 16.54,
"learning_rate": 2.491280518186348e-05,
"loss": 1.3401,
"step": 22140
},
{
"epoch": 16.55,
"learning_rate": 2.4898964734540224e-05,
"loss": 1.3147,
"step": 22150
},
{
"epoch": 16.56,
"learning_rate": 2.4885124287216965e-05,
"loss": 1.3154,
"step": 22160
},
{
"epoch": 16.56,
"learning_rate": 2.4871283839893707e-05,
"loss": 1.2636,
"step": 22170
},
{
"epoch": 16.57,
"learning_rate": 2.4857443392570448e-05,
"loss": 1.2477,
"step": 22180
},
{
"epoch": 16.58,
"learning_rate": 2.484360294524719e-05,
"loss": 1.2913,
"step": 22190
},
{
"epoch": 16.59,
"learning_rate": 2.4829762497923934e-05,
"loss": 1.2986,
"step": 22200
},
{
"epoch": 16.59,
"learning_rate": 2.4815922050600675e-05,
"loss": 1.3344,
"step": 22210
},
{
"epoch": 16.6,
"learning_rate": 2.480208160327742e-05,
"loss": 1.3279,
"step": 22220
},
{
"epoch": 16.61,
"learning_rate": 2.478824115595416e-05,
"loss": 1.2833,
"step": 22230
},
{
"epoch": 16.62,
"learning_rate": 2.4774400708630906e-05,
"loss": 1.324,
"step": 22240
},
{
"epoch": 16.62,
"learning_rate": 2.4760560261307647e-05,
"loss": 1.3112,
"step": 22250
},
{
"epoch": 16.63,
"learning_rate": 2.4746719813984388e-05,
"loss": 1.3066,
"step": 22260
},
{
"epoch": 16.64,
"learning_rate": 2.4732879366661133e-05,
"loss": 1.2677,
"step": 22270
},
{
"epoch": 16.65,
"learning_rate": 2.4719038919337874e-05,
"loss": 1.2793,
"step": 22280
},
{
"epoch": 16.65,
"learning_rate": 2.470519847201462e-05,
"loss": 1.3253,
"step": 22290
},
{
"epoch": 16.66,
"learning_rate": 2.4691358024691357e-05,
"loss": 1.2838,
"step": 22300
},
{
"epoch": 16.67,
"learning_rate": 2.46775175773681e-05,
"loss": 1.2741,
"step": 22310
},
{
"epoch": 16.68,
"learning_rate": 2.4663677130044842e-05,
"loss": 1.2894,
"step": 22320
},
{
"epoch": 16.68,
"learning_rate": 2.4649836682721587e-05,
"loss": 1.311,
"step": 22330
},
{
"epoch": 16.69,
"learning_rate": 2.463599623539833e-05,
"loss": 1.3027,
"step": 22340
},
{
"epoch": 16.7,
"learning_rate": 2.4622155788075073e-05,
"loss": 1.3238,
"step": 22350
},
{
"epoch": 16.71,
"learning_rate": 2.4608315340751814e-05,
"loss": 1.2994,
"step": 22360
},
{
"epoch": 16.71,
"learning_rate": 2.4594474893428556e-05,
"loss": 1.287,
"step": 22370
},
{
"epoch": 16.72,
"learning_rate": 2.45806344461053e-05,
"loss": 1.3126,
"step": 22380
},
{
"epoch": 16.73,
"learning_rate": 2.456679399878204e-05,
"loss": 1.3083,
"step": 22390
},
{
"epoch": 16.74,
"learning_rate": 2.4552953551458786e-05,
"loss": 1.3077,
"step": 22400
},
{
"epoch": 16.74,
"learning_rate": 2.4539113104135527e-05,
"loss": 1.2977,
"step": 22410
},
{
"epoch": 16.75,
"learning_rate": 2.4525272656812272e-05,
"loss": 1.3023,
"step": 22420
},
{
"epoch": 16.76,
"learning_rate": 2.4511432209489013e-05,
"loss": 1.2858,
"step": 22430
},
{
"epoch": 16.77,
"learning_rate": 2.4497591762165754e-05,
"loss": 1.3016,
"step": 22440
},
{
"epoch": 16.77,
"learning_rate": 2.4483751314842496e-05,
"loss": 1.3009,
"step": 22450
},
{
"epoch": 16.78,
"learning_rate": 2.4469910867519237e-05,
"loss": 1.2811,
"step": 22460
},
{
"epoch": 16.79,
"learning_rate": 2.445607042019598e-05,
"loss": 1.3532,
"step": 22470
},
{
"epoch": 16.79,
"learning_rate": 2.4442229972872723e-05,
"loss": 1.3439,
"step": 22480
},
{
"epoch": 16.8,
"learning_rate": 2.4428389525549468e-05,
"loss": 1.2958,
"step": 22490
},
{
"epoch": 16.81,
"learning_rate": 2.441454907822621e-05,
"loss": 1.2882,
"step": 22500
},
{
"epoch": 16.82,
"learning_rate": 2.4400708630902953e-05,
"loss": 1.2934,
"step": 22510
},
{
"epoch": 16.82,
"learning_rate": 2.4386868183579695e-05,
"loss": 1.2743,
"step": 22520
},
{
"epoch": 16.83,
"learning_rate": 2.4373027736256436e-05,
"loss": 1.2934,
"step": 22530
},
{
"epoch": 16.84,
"learning_rate": 2.435918728893318e-05,
"loss": 1.2922,
"step": 22540
},
{
"epoch": 16.85,
"learning_rate": 2.4345346841609922e-05,
"loss": 1.2883,
"step": 22550
},
{
"epoch": 16.85,
"learning_rate": 2.4331506394286666e-05,
"loss": 1.2833,
"step": 22560
},
{
"epoch": 16.86,
"learning_rate": 2.4317665946963408e-05,
"loss": 1.2967,
"step": 22570
},
{
"epoch": 16.87,
"learning_rate": 2.430382549964015e-05,
"loss": 1.3096,
"step": 22580
},
{
"epoch": 16.88,
"learning_rate": 2.428998505231689e-05,
"loss": 1.3162,
"step": 22590
},
{
"epoch": 16.88,
"learning_rate": 2.4276144604993635e-05,
"loss": 1.297,
"step": 22600
},
{
"epoch": 16.89,
"learning_rate": 2.4262304157670376e-05,
"loss": 1.32,
"step": 22610
},
{
"epoch": 16.9,
"learning_rate": 2.424846371034712e-05,
"loss": 1.2753,
"step": 22620
},
{
"epoch": 16.91,
"learning_rate": 2.4234623263023862e-05,
"loss": 1.3064,
"step": 22630
},
{
"epoch": 16.91,
"learning_rate": 2.4220782815700603e-05,
"loss": 1.2908,
"step": 22640
},
{
"epoch": 16.92,
"learning_rate": 2.4206942368377348e-05,
"loss": 1.3018,
"step": 22650
},
{
"epoch": 16.93,
"learning_rate": 2.419310192105409e-05,
"loss": 1.2931,
"step": 22660
},
{
"epoch": 16.94,
"learning_rate": 2.4179261473730834e-05,
"loss": 1.2936,
"step": 22670
},
{
"epoch": 16.94,
"learning_rate": 2.4165421026407575e-05,
"loss": 1.3265,
"step": 22680
},
{
"epoch": 16.95,
"learning_rate": 2.415158057908432e-05,
"loss": 1.253,
"step": 22690
},
{
"epoch": 16.96,
"learning_rate": 2.413774013176106e-05,
"loss": 1.2795,
"step": 22700
},
{
"epoch": 16.97,
"learning_rate": 2.4123899684437802e-05,
"loss": 1.2594,
"step": 22710
},
{
"epoch": 16.97,
"learning_rate": 2.4110059237114543e-05,
"loss": 1.2977,
"step": 22720
},
{
"epoch": 16.98,
"learning_rate": 2.4096218789791285e-05,
"loss": 1.3001,
"step": 22730
},
{
"epoch": 16.99,
"learning_rate": 2.408237834246803e-05,
"loss": 1.2431,
"step": 22740
},
{
"epoch": 17.0,
"learning_rate": 2.406853789514477e-05,
"loss": 1.3002,
"step": 22750
},
{
"epoch": 17.0,
"eval_accuracy": 0.5869005725090604,
"eval_loss": 0.9684858322143555,
"eval_runtime": 71.0719,
"eval_samples_per_second": 267.884,
"eval_steps_per_second": 8.372,
"step": 22754
},
{
"epoch": 17.0,
"learning_rate": 2.4054697447821515e-05,
"loss": 1.3065,
"step": 22760
},
{
"epoch": 17.01,
"learning_rate": 2.4040857000498256e-05,
"loss": 1.2703,
"step": 22770
},
{
"epoch": 17.02,
"learning_rate": 2.4027016553175e-05,
"loss": 1.3315,
"step": 22780
},
{
"epoch": 17.03,
"learning_rate": 2.4013176105851742e-05,
"loss": 1.2721,
"step": 22790
},
{
"epoch": 17.03,
"learning_rate": 2.3999335658528484e-05,
"loss": 1.3001,
"step": 22800
},
{
"epoch": 17.04,
"learning_rate": 2.3985495211205228e-05,
"loss": 1.2568,
"step": 22810
},
{
"epoch": 17.05,
"learning_rate": 2.397165476388197e-05,
"loss": 1.2594,
"step": 22820
},
{
"epoch": 17.06,
"learning_rate": 2.3957814316558714e-05,
"loss": 1.2695,
"step": 22830
},
{
"epoch": 17.06,
"learning_rate": 2.3943973869235455e-05,
"loss": 1.3251,
"step": 22840
},
{
"epoch": 17.07,
"learning_rate": 2.39301334219122e-05,
"loss": 1.2393,
"step": 22850
},
{
"epoch": 17.08,
"learning_rate": 2.3916292974588938e-05,
"loss": 1.2814,
"step": 22860
},
{
"epoch": 17.09,
"learning_rate": 2.3902452527265683e-05,
"loss": 1.2987,
"step": 22870
},
{
"epoch": 17.09,
"learning_rate": 2.3888612079942424e-05,
"loss": 1.272,
"step": 22880
},
{
"epoch": 17.1,
"learning_rate": 2.3874771632619165e-05,
"loss": 1.2803,
"step": 22890
},
{
"epoch": 17.11,
"learning_rate": 2.386093118529591e-05,
"loss": 1.3073,
"step": 22900
},
{
"epoch": 17.12,
"learning_rate": 2.384709073797265e-05,
"loss": 1.3303,
"step": 22910
},
{
"epoch": 17.12,
"learning_rate": 2.3833250290649396e-05,
"loss": 1.3112,
"step": 22920
},
{
"epoch": 17.13,
"learning_rate": 2.3819409843326137e-05,
"loss": 1.2875,
"step": 22930
},
{
"epoch": 17.14,
"learning_rate": 2.380556939600288e-05,
"loss": 1.2807,
"step": 22940
},
{
"epoch": 17.15,
"learning_rate": 2.3791728948679623e-05,
"loss": 1.2743,
"step": 22950
},
{
"epoch": 17.15,
"learning_rate": 2.3777888501356364e-05,
"loss": 1.2697,
"step": 22960
},
{
"epoch": 17.16,
"learning_rate": 2.376404805403311e-05,
"loss": 1.2847,
"step": 22970
},
{
"epoch": 17.17,
"learning_rate": 2.375020760670985e-05,
"loss": 1.3552,
"step": 22980
},
{
"epoch": 17.18,
"learning_rate": 2.3736367159386595e-05,
"loss": 1.3139,
"step": 22990
},
{
"epoch": 17.18,
"learning_rate": 2.3722526712063332e-05,
"loss": 1.317,
"step": 23000
},
{
"epoch": 17.19,
"learning_rate": 2.3708686264740077e-05,
"loss": 1.2612,
"step": 23010
},
{
"epoch": 17.2,
"learning_rate": 2.369484581741682e-05,
"loss": 1.2655,
"step": 23020
},
{
"epoch": 17.21,
"learning_rate": 2.3681005370093563e-05,
"loss": 1.308,
"step": 23030
},
{
"epoch": 17.21,
"learning_rate": 2.3667164922770304e-05,
"loss": 1.2999,
"step": 23040
},
{
"epoch": 17.22,
"learning_rate": 2.365332447544705e-05,
"loss": 1.2915,
"step": 23050
},
{
"epoch": 17.23,
"learning_rate": 2.363948402812379e-05,
"loss": 1.2773,
"step": 23060
},
{
"epoch": 17.24,
"learning_rate": 2.362564358080053e-05,
"loss": 1.2982,
"step": 23070
},
{
"epoch": 17.24,
"learning_rate": 2.3611803133477276e-05,
"loss": 1.2813,
"step": 23080
},
{
"epoch": 17.25,
"learning_rate": 2.3597962686154017e-05,
"loss": 1.3116,
"step": 23090
},
{
"epoch": 17.26,
"learning_rate": 2.3584122238830762e-05,
"loss": 1.2665,
"step": 23100
},
{
"epoch": 17.27,
"learning_rate": 2.3570281791507503e-05,
"loss": 1.2685,
"step": 23110
},
{
"epoch": 17.27,
"learning_rate": 2.3556441344184248e-05,
"loss": 1.334,
"step": 23120
},
{
"epoch": 17.28,
"learning_rate": 2.354260089686099e-05,
"loss": 1.2872,
"step": 23130
},
{
"epoch": 17.29,
"learning_rate": 2.352876044953773e-05,
"loss": 1.2599,
"step": 23140
},
{
"epoch": 17.3,
"learning_rate": 2.351492000221447e-05,
"loss": 1.3027,
"step": 23150
},
{
"epoch": 17.3,
"learning_rate": 2.3501079554891213e-05,
"loss": 1.2673,
"step": 23160
},
{
"epoch": 17.31,
"learning_rate": 2.3487239107567957e-05,
"loss": 1.2621,
"step": 23170
},
{
"epoch": 17.32,
"learning_rate": 2.34733986602447e-05,
"loss": 1.287,
"step": 23180
},
{
"epoch": 17.33,
"learning_rate": 2.3459558212921443e-05,
"loss": 1.2946,
"step": 23190
},
{
"epoch": 17.33,
"learning_rate": 2.3445717765598185e-05,
"loss": 1.2764,
"step": 23200
},
{
"epoch": 17.34,
"learning_rate": 2.343187731827493e-05,
"loss": 1.3174,
"step": 23210
},
{
"epoch": 17.35,
"learning_rate": 2.341803687095167e-05,
"loss": 1.2918,
"step": 23220
},
{
"epoch": 17.36,
"learning_rate": 2.3404196423628412e-05,
"loss": 1.2674,
"step": 23230
},
{
"epoch": 17.36,
"learning_rate": 2.3390355976305156e-05,
"loss": 1.3099,
"step": 23240
},
{
"epoch": 17.37,
"learning_rate": 2.3376515528981898e-05,
"loss": 1.3284,
"step": 23250
},
{
"epoch": 17.38,
"learning_rate": 2.3362675081658642e-05,
"loss": 1.2978,
"step": 23260
},
{
"epoch": 17.39,
"learning_rate": 2.3348834634335384e-05,
"loss": 1.2818,
"step": 23270
},
{
"epoch": 17.39,
"learning_rate": 2.3334994187012125e-05,
"loss": 1.2838,
"step": 23280
},
{
"epoch": 17.4,
"learning_rate": 2.3321153739688866e-05,
"loss": 1.285,
"step": 23290
},
{
"epoch": 17.41,
"learning_rate": 2.330731329236561e-05,
"loss": 1.2875,
"step": 23300
},
{
"epoch": 17.42,
"learning_rate": 2.3293472845042352e-05,
"loss": 1.2715,
"step": 23310
},
{
"epoch": 17.42,
"learning_rate": 2.3279632397719097e-05,
"loss": 1.2771,
"step": 23320
},
{
"epoch": 17.43,
"learning_rate": 2.3265791950395838e-05,
"loss": 1.3128,
"step": 23330
},
{
"epoch": 17.44,
"learning_rate": 2.325195150307258e-05,
"loss": 1.315,
"step": 23340
},
{
"epoch": 17.44,
"learning_rate": 2.3238111055749324e-05,
"loss": 1.3162,
"step": 23350
},
{
"epoch": 17.45,
"learning_rate": 2.3224270608426065e-05,
"loss": 1.2974,
"step": 23360
},
{
"epoch": 17.46,
"learning_rate": 2.321043016110281e-05,
"loss": 1.289,
"step": 23370
},
{
"epoch": 17.47,
"learning_rate": 2.319658971377955e-05,
"loss": 1.299,
"step": 23380
},
{
"epoch": 17.47,
"learning_rate": 2.3182749266456296e-05,
"loss": 1.328,
"step": 23390
},
{
"epoch": 17.48,
"learning_rate": 2.3168908819133037e-05,
"loss": 1.3246,
"step": 23400
},
{
"epoch": 17.49,
"learning_rate": 2.3155068371809778e-05,
"loss": 1.2638,
"step": 23410
},
{
"epoch": 17.5,
"learning_rate": 2.314122792448652e-05,
"loss": 1.2657,
"step": 23420
},
{
"epoch": 17.5,
"learning_rate": 2.312738747716326e-05,
"loss": 1.2579,
"step": 23430
},
{
"epoch": 17.51,
"learning_rate": 2.3113547029840005e-05,
"loss": 1.2863,
"step": 23440
},
{
"epoch": 17.52,
"learning_rate": 2.3099706582516746e-05,
"loss": 1.2969,
"step": 23450
},
{
"epoch": 17.53,
"learning_rate": 2.308586613519349e-05,
"loss": 1.3084,
"step": 23460
},
{
"epoch": 17.53,
"learning_rate": 2.3072025687870232e-05,
"loss": 1.3105,
"step": 23470
},
{
"epoch": 17.54,
"learning_rate": 2.3058185240546977e-05,
"loss": 1.3467,
"step": 23480
},
{
"epoch": 17.55,
"learning_rate": 2.3044344793223718e-05,
"loss": 1.3042,
"step": 23490
},
{
"epoch": 17.56,
"learning_rate": 2.303050434590046e-05,
"loss": 1.3073,
"step": 23500
},
{
"epoch": 17.56,
"learning_rate": 2.3016663898577204e-05,
"loss": 1.2887,
"step": 23510
},
{
"epoch": 17.57,
"learning_rate": 2.3002823451253945e-05,
"loss": 1.3019,
"step": 23520
},
{
"epoch": 17.58,
"learning_rate": 2.298898300393069e-05,
"loss": 1.2871,
"step": 23530
},
{
"epoch": 17.59,
"learning_rate": 2.297514255660743e-05,
"loss": 1.3348,
"step": 23540
},
{
"epoch": 17.59,
"learning_rate": 2.2961302109284173e-05,
"loss": 1.2796,
"step": 23550
},
{
"epoch": 17.6,
"learning_rate": 2.2947461661960914e-05,
"loss": 1.3037,
"step": 23560
},
{
"epoch": 17.61,
"learning_rate": 2.293362121463766e-05,
"loss": 1.3062,
"step": 23570
},
{
"epoch": 17.62,
"learning_rate": 2.29197807673144e-05,
"loss": 1.3066,
"step": 23580
},
{
"epoch": 17.62,
"learning_rate": 2.290594031999114e-05,
"loss": 1.3044,
"step": 23590
},
{
"epoch": 17.63,
"learning_rate": 2.2892099872667886e-05,
"loss": 1.3054,
"step": 23600
},
{
"epoch": 17.64,
"learning_rate": 2.2878259425344627e-05,
"loss": 1.279,
"step": 23610
},
{
"epoch": 17.65,
"learning_rate": 2.286441897802137e-05,
"loss": 1.3194,
"step": 23620
},
{
"epoch": 17.65,
"learning_rate": 2.2850578530698113e-05,
"loss": 1.3134,
"step": 23630
},
{
"epoch": 17.66,
"learning_rate": 2.2836738083374857e-05,
"loss": 1.3161,
"step": 23640
},
{
"epoch": 17.67,
"learning_rate": 2.28228976360516e-05,
"loss": 1.3159,
"step": 23650
},
{
"epoch": 17.68,
"learning_rate": 2.2809057188728343e-05,
"loss": 1.2916,
"step": 23660
},
{
"epoch": 17.68,
"learning_rate": 2.2795216741405085e-05,
"loss": 1.2873,
"step": 23670
},
{
"epoch": 17.69,
"learning_rate": 2.2781376294081826e-05,
"loss": 1.2494,
"step": 23680
},
{
"epoch": 17.7,
"learning_rate": 2.2767535846758567e-05,
"loss": 1.3089,
"step": 23690
},
{
"epoch": 17.71,
"learning_rate": 2.275369539943531e-05,
"loss": 1.2724,
"step": 23700
},
{
"epoch": 17.71,
"learning_rate": 2.2739854952112053e-05,
"loss": 1.2933,
"step": 23710
},
{
"epoch": 17.72,
"learning_rate": 2.2726014504788794e-05,
"loss": 1.2955,
"step": 23720
},
{
"epoch": 17.73,
"learning_rate": 2.271217405746554e-05,
"loss": 1.287,
"step": 23730
},
{
"epoch": 17.74,
"learning_rate": 2.269833361014228e-05,
"loss": 1.3319,
"step": 23740
},
{
"epoch": 17.74,
"learning_rate": 2.2684493162819025e-05,
"loss": 1.2791,
"step": 23750
},
{
"epoch": 17.75,
"learning_rate": 2.2670652715495766e-05,
"loss": 1.3351,
"step": 23760
},
{
"epoch": 17.76,
"learning_rate": 2.2656812268172507e-05,
"loss": 1.3318,
"step": 23770
},
{
"epoch": 17.77,
"learning_rate": 2.2642971820849252e-05,
"loss": 1.2721,
"step": 23780
},
{
"epoch": 17.77,
"learning_rate": 2.2629131373525993e-05,
"loss": 1.324,
"step": 23790
},
{
"epoch": 17.78,
"learning_rate": 2.2615290926202738e-05,
"loss": 1.2465,
"step": 23800
},
{
"epoch": 17.79,
"learning_rate": 2.260145047887948e-05,
"loss": 1.2849,
"step": 23810
},
{
"epoch": 17.8,
"learning_rate": 2.2587610031556224e-05,
"loss": 1.2689,
"step": 23820
},
{
"epoch": 17.8,
"learning_rate": 2.257376958423296e-05,
"loss": 1.3327,
"step": 23830
},
{
"epoch": 17.81,
"learning_rate": 2.2559929136909706e-05,
"loss": 1.2619,
"step": 23840
},
{
"epoch": 17.82,
"learning_rate": 2.2546088689586447e-05,
"loss": 1.2926,
"step": 23850
},
{
"epoch": 17.83,
"learning_rate": 2.253224824226319e-05,
"loss": 1.2681,
"step": 23860
},
{
"epoch": 17.83,
"learning_rate": 2.2518407794939933e-05,
"loss": 1.3282,
"step": 23870
},
{
"epoch": 17.84,
"learning_rate": 2.2504567347616675e-05,
"loss": 1.3387,
"step": 23880
},
{
"epoch": 17.85,
"learning_rate": 2.249072690029342e-05,
"loss": 1.3107,
"step": 23890
},
{
"epoch": 17.86,
"learning_rate": 2.247688645297016e-05,
"loss": 1.2781,
"step": 23900
},
{
"epoch": 17.86,
"learning_rate": 2.2463046005646905e-05,
"loss": 1.2771,
"step": 23910
},
{
"epoch": 17.87,
"learning_rate": 2.2449205558323646e-05,
"loss": 1.3214,
"step": 23920
},
{
"epoch": 17.88,
"learning_rate": 2.2435365111000388e-05,
"loss": 1.2929,
"step": 23930
},
{
"epoch": 17.89,
"learning_rate": 2.2421524663677132e-05,
"loss": 1.3048,
"step": 23940
},
{
"epoch": 17.89,
"learning_rate": 2.2407684216353874e-05,
"loss": 1.334,
"step": 23950
},
{
"epoch": 17.9,
"learning_rate": 2.2393843769030618e-05,
"loss": 1.3081,
"step": 23960
},
{
"epoch": 17.91,
"learning_rate": 2.2380003321707356e-05,
"loss": 1.3092,
"step": 23970
},
{
"epoch": 17.92,
"learning_rate": 2.23661628743841e-05,
"loss": 1.2924,
"step": 23980
},
{
"epoch": 17.92,
"learning_rate": 2.2352322427060842e-05,
"loss": 1.2965,
"step": 23990
},
{
"epoch": 17.93,
"learning_rate": 2.2338481979737587e-05,
"loss": 1.2917,
"step": 24000
},
{
"epoch": 17.94,
"learning_rate": 2.2324641532414328e-05,
"loss": 1.2985,
"step": 24010
},
{
"epoch": 17.95,
"learning_rate": 2.2310801085091072e-05,
"loss": 1.2689,
"step": 24020
},
{
"epoch": 17.95,
"learning_rate": 2.2296960637767814e-05,
"loss": 1.3423,
"step": 24030
},
{
"epoch": 17.96,
"learning_rate": 2.2283120190444555e-05,
"loss": 1.2806,
"step": 24040
},
{
"epoch": 17.97,
"learning_rate": 2.22692797431213e-05,
"loss": 1.3018,
"step": 24050
},
{
"epoch": 17.98,
"learning_rate": 2.225543929579804e-05,
"loss": 1.2862,
"step": 24060
},
{
"epoch": 17.98,
"learning_rate": 2.2241598848474786e-05,
"loss": 1.3226,
"step": 24070
},
{
"epoch": 17.99,
"learning_rate": 2.2227758401151527e-05,
"loss": 1.2764,
"step": 24080
},
{
"epoch": 18.0,
"learning_rate": 2.221391795382827e-05,
"loss": 1.289,
"step": 24090
},
{
"epoch": 18.0,
"eval_accuracy": 0.5940963285886863,
"eval_loss": 0.9519224762916565,
"eval_runtime": 71.0733,
"eval_samples_per_second": 267.878,
"eval_steps_per_second": 8.372,
"step": 24093
},
{
"epoch": 18.01,
"learning_rate": 2.2200077506505013e-05,
"loss": 1.2613,
"step": 24100
},
{
"epoch": 18.01,
"learning_rate": 2.2186237059181754e-05,
"loss": 1.2857,
"step": 24110
},
{
"epoch": 18.02,
"learning_rate": 2.2172396611858495e-05,
"loss": 1.2978,
"step": 24120
},
{
"epoch": 18.03,
"learning_rate": 2.2158556164535236e-05,
"loss": 1.3054,
"step": 24130
},
{
"epoch": 18.04,
"learning_rate": 2.214471571721198e-05,
"loss": 1.3058,
"step": 24140
},
{
"epoch": 18.04,
"learning_rate": 2.2130875269888722e-05,
"loss": 1.2863,
"step": 24150
},
{
"epoch": 18.05,
"learning_rate": 2.2117034822565467e-05,
"loss": 1.2381,
"step": 24160
},
{
"epoch": 18.06,
"learning_rate": 2.2103194375242208e-05,
"loss": 1.2866,
"step": 24170
},
{
"epoch": 18.06,
"learning_rate": 2.2089353927918953e-05,
"loss": 1.2598,
"step": 24180
},
{
"epoch": 18.07,
"learning_rate": 2.2075513480595694e-05,
"loss": 1.2974,
"step": 24190
},
{
"epoch": 18.08,
"learning_rate": 2.2061673033272435e-05,
"loss": 1.3053,
"step": 24200
},
{
"epoch": 18.09,
"learning_rate": 2.204783258594918e-05,
"loss": 1.279,
"step": 24210
},
{
"epoch": 18.09,
"learning_rate": 2.203399213862592e-05,
"loss": 1.2839,
"step": 24220
},
{
"epoch": 18.1,
"learning_rate": 2.2020151691302666e-05,
"loss": 1.2597,
"step": 24230
},
{
"epoch": 18.11,
"learning_rate": 2.2006311243979407e-05,
"loss": 1.2583,
"step": 24240
},
{
"epoch": 18.12,
"learning_rate": 2.199247079665615e-05,
"loss": 1.2785,
"step": 24250
},
{
"epoch": 18.12,
"learning_rate": 2.197863034933289e-05,
"loss": 1.3138,
"step": 24260
},
{
"epoch": 18.13,
"learning_rate": 2.1964789902009634e-05,
"loss": 1.3045,
"step": 24270
},
{
"epoch": 18.14,
"learning_rate": 2.1950949454686376e-05,
"loss": 1.2897,
"step": 24280
},
{
"epoch": 18.15,
"learning_rate": 2.193710900736312e-05,
"loss": 1.286,
"step": 24290
},
{
"epoch": 18.15,
"learning_rate": 2.192326856003986e-05,
"loss": 1.3211,
"step": 24300
},
{
"epoch": 18.16,
"learning_rate": 2.1909428112716603e-05,
"loss": 1.2831,
"step": 24310
},
{
"epoch": 18.17,
"learning_rate": 2.1895587665393347e-05,
"loss": 1.3021,
"step": 24320
},
{
"epoch": 18.18,
"learning_rate": 2.188174721807009e-05,
"loss": 1.2991,
"step": 24330
},
{
"epoch": 18.18,
"learning_rate": 2.1867906770746833e-05,
"loss": 1.2579,
"step": 24340
},
{
"epoch": 18.19,
"learning_rate": 2.1854066323423575e-05,
"loss": 1.2475,
"step": 24350
},
{
"epoch": 18.2,
"learning_rate": 2.184022587610032e-05,
"loss": 1.2686,
"step": 24360
},
{
"epoch": 18.21,
"learning_rate": 2.182638542877706e-05,
"loss": 1.2818,
"step": 24370
},
{
"epoch": 18.21,
"learning_rate": 2.18125449814538e-05,
"loss": 1.2929,
"step": 24380
},
{
"epoch": 18.22,
"learning_rate": 2.1798704534130543e-05,
"loss": 1.294,
"step": 24390
},
{
"epoch": 18.23,
"learning_rate": 2.1784864086807284e-05,
"loss": 1.3008,
"step": 24400
},
{
"epoch": 18.24,
"learning_rate": 2.177102363948403e-05,
"loss": 1.3038,
"step": 24410
},
{
"epoch": 18.24,
"learning_rate": 2.175718319216077e-05,
"loss": 1.2662,
"step": 24420
},
{
"epoch": 18.25,
"learning_rate": 2.1743342744837515e-05,
"loss": 1.2545,
"step": 24430
},
{
"epoch": 18.26,
"learning_rate": 2.1729502297514256e-05,
"loss": 1.322,
"step": 24440
},
{
"epoch": 18.27,
"learning_rate": 2.1715661850191e-05,
"loss": 1.2817,
"step": 24450
},
{
"epoch": 18.27,
"learning_rate": 2.1701821402867742e-05,
"loss": 1.3155,
"step": 24460
},
{
"epoch": 18.28,
"learning_rate": 2.1687980955544483e-05,
"loss": 1.3609,
"step": 24470
},
{
"epoch": 18.29,
"learning_rate": 2.1674140508221228e-05,
"loss": 1.2941,
"step": 24480
},
{
"epoch": 18.3,
"learning_rate": 2.166030006089797e-05,
"loss": 1.272,
"step": 24490
},
{
"epoch": 18.3,
"learning_rate": 2.1646459613574714e-05,
"loss": 1.2936,
"step": 24500
},
{
"epoch": 18.31,
"learning_rate": 2.1632619166251455e-05,
"loss": 1.3253,
"step": 24510
},
{
"epoch": 18.32,
"learning_rate": 2.16187787189282e-05,
"loss": 1.2619,
"step": 24520
},
{
"epoch": 18.33,
"learning_rate": 2.1604938271604937e-05,
"loss": 1.2756,
"step": 24530
},
{
"epoch": 18.33,
"learning_rate": 2.1591097824281682e-05,
"loss": 1.31,
"step": 24540
},
{
"epoch": 18.34,
"learning_rate": 2.1577257376958423e-05,
"loss": 1.3039,
"step": 24550
},
{
"epoch": 18.35,
"learning_rate": 2.1563416929635165e-05,
"loss": 1.3094,
"step": 24560
},
{
"epoch": 18.36,
"learning_rate": 2.154957648231191e-05,
"loss": 1.2967,
"step": 24570
},
{
"epoch": 18.36,
"learning_rate": 2.153573603498865e-05,
"loss": 1.2797,
"step": 24580
},
{
"epoch": 18.37,
"learning_rate": 2.1521895587665395e-05,
"loss": 1.3017,
"step": 24590
},
{
"epoch": 18.38,
"learning_rate": 2.1508055140342136e-05,
"loss": 1.2918,
"step": 24600
},
{
"epoch": 18.39,
"learning_rate": 2.149421469301888e-05,
"loss": 1.2844,
"step": 24610
},
{
"epoch": 18.39,
"learning_rate": 2.1480374245695622e-05,
"loss": 1.2658,
"step": 24620
},
{
"epoch": 18.4,
"learning_rate": 2.1466533798372367e-05,
"loss": 1.3162,
"step": 24630
},
{
"epoch": 18.41,
"learning_rate": 2.1452693351049108e-05,
"loss": 1.273,
"step": 24640
},
{
"epoch": 18.42,
"learning_rate": 2.143885290372585e-05,
"loss": 1.2806,
"step": 24650
},
{
"epoch": 18.42,
"learning_rate": 2.142501245640259e-05,
"loss": 1.316,
"step": 24660
},
{
"epoch": 18.43,
"learning_rate": 2.1411172009079332e-05,
"loss": 1.2932,
"step": 24670
},
{
"epoch": 18.44,
"learning_rate": 2.1397331561756077e-05,
"loss": 1.2543,
"step": 24680
},
{
"epoch": 18.45,
"learning_rate": 2.1383491114432818e-05,
"loss": 1.2845,
"step": 24690
},
{
"epoch": 18.45,
"learning_rate": 2.1369650667109562e-05,
"loss": 1.2565,
"step": 24700
},
{
"epoch": 18.46,
"learning_rate": 2.1355810219786304e-05,
"loss": 1.2838,
"step": 24710
},
{
"epoch": 18.47,
"learning_rate": 2.134196977246305e-05,
"loss": 1.3025,
"step": 24720
},
{
"epoch": 18.48,
"learning_rate": 2.132812932513979e-05,
"loss": 1.2438,
"step": 24730
},
{
"epoch": 18.48,
"learning_rate": 2.131428887781653e-05,
"loss": 1.2826,
"step": 24740
},
{
"epoch": 18.49,
"learning_rate": 2.1300448430493276e-05,
"loss": 1.298,
"step": 24750
},
{
"epoch": 18.5,
"learning_rate": 2.1286607983170017e-05,
"loss": 1.26,
"step": 24760
},
{
"epoch": 18.51,
"learning_rate": 2.127276753584676e-05,
"loss": 1.3425,
"step": 24770
},
{
"epoch": 18.51,
"learning_rate": 2.1258927088523503e-05,
"loss": 1.3084,
"step": 24780
},
{
"epoch": 18.52,
"learning_rate": 2.1245086641200247e-05,
"loss": 1.2981,
"step": 24790
},
{
"epoch": 18.53,
"learning_rate": 2.1231246193876985e-05,
"loss": 1.3173,
"step": 24800
},
{
"epoch": 18.54,
"learning_rate": 2.121740574655373e-05,
"loss": 1.2746,
"step": 24810
},
{
"epoch": 18.54,
"learning_rate": 2.120356529923047e-05,
"loss": 1.2851,
"step": 24820
},
{
"epoch": 18.55,
"learning_rate": 2.1189724851907212e-05,
"loss": 1.2735,
"step": 24830
},
{
"epoch": 18.56,
"learning_rate": 2.1175884404583957e-05,
"loss": 1.2628,
"step": 24840
},
{
"epoch": 18.57,
"learning_rate": 2.1162043957260698e-05,
"loss": 1.3399,
"step": 24850
},
{
"epoch": 18.57,
"learning_rate": 2.1148203509937443e-05,
"loss": 1.2839,
"step": 24860
},
{
"epoch": 18.58,
"learning_rate": 2.1134363062614184e-05,
"loss": 1.2567,
"step": 24870
},
{
"epoch": 18.59,
"learning_rate": 2.112052261529093e-05,
"loss": 1.2505,
"step": 24880
},
{
"epoch": 18.6,
"learning_rate": 2.110668216796767e-05,
"loss": 1.2847,
"step": 24890
},
{
"epoch": 18.6,
"learning_rate": 2.109284172064441e-05,
"loss": 1.2808,
"step": 24900
},
{
"epoch": 18.61,
"learning_rate": 2.1079001273321156e-05,
"loss": 1.2645,
"step": 24910
},
{
"epoch": 18.62,
"learning_rate": 2.1065160825997897e-05,
"loss": 1.2931,
"step": 24920
},
{
"epoch": 18.63,
"learning_rate": 2.1051320378674642e-05,
"loss": 1.264,
"step": 24930
},
{
"epoch": 18.63,
"learning_rate": 2.103747993135138e-05,
"loss": 1.272,
"step": 24940
},
{
"epoch": 18.64,
"learning_rate": 2.1023639484028124e-05,
"loss": 1.2644,
"step": 24950
},
{
"epoch": 18.65,
"learning_rate": 2.1009799036704866e-05,
"loss": 1.3275,
"step": 24960
},
{
"epoch": 18.66,
"learning_rate": 2.099595858938161e-05,
"loss": 1.3172,
"step": 24970
},
{
"epoch": 18.66,
"learning_rate": 2.098211814205835e-05,
"loss": 1.2425,
"step": 24980
},
{
"epoch": 18.67,
"learning_rate": 2.0968277694735096e-05,
"loss": 1.3038,
"step": 24990
},
{
"epoch": 18.68,
"learning_rate": 2.0954437247411837e-05,
"loss": 1.2943,
"step": 25000
},
{
"epoch": 18.69,
"learning_rate": 2.094059680008858e-05,
"loss": 1.2744,
"step": 25010
},
{
"epoch": 18.69,
"learning_rate": 2.0926756352765323e-05,
"loss": 1.2658,
"step": 25020
},
{
"epoch": 18.7,
"learning_rate": 2.0912915905442064e-05,
"loss": 1.3156,
"step": 25030
},
{
"epoch": 18.71,
"learning_rate": 2.089907545811881e-05,
"loss": 1.2951,
"step": 25040
},
{
"epoch": 18.71,
"learning_rate": 2.088523501079555e-05,
"loss": 1.3168,
"step": 25050
},
{
"epoch": 18.72,
"learning_rate": 2.0871394563472295e-05,
"loss": 1.3187,
"step": 25060
},
{
"epoch": 18.73,
"learning_rate": 2.0857554116149036e-05,
"loss": 1.3175,
"step": 25070
},
{
"epoch": 18.74,
"learning_rate": 2.0843713668825778e-05,
"loss": 1.2696,
"step": 25080
},
{
"epoch": 18.74,
"learning_rate": 2.082987322150252e-05,
"loss": 1.2969,
"step": 25090
},
{
"epoch": 18.75,
"learning_rate": 2.081603277417926e-05,
"loss": 1.2422,
"step": 25100
},
{
"epoch": 18.76,
"learning_rate": 2.0802192326856005e-05,
"loss": 1.2912,
"step": 25110
},
{
"epoch": 18.77,
"learning_rate": 2.0788351879532746e-05,
"loss": 1.2343,
"step": 25120
},
{
"epoch": 18.77,
"learning_rate": 2.077451143220949e-05,
"loss": 1.2553,
"step": 25130
},
{
"epoch": 18.78,
"learning_rate": 2.0760670984886232e-05,
"loss": 1.2849,
"step": 25140
},
{
"epoch": 18.79,
"learning_rate": 2.0746830537562976e-05,
"loss": 1.3126,
"step": 25150
},
{
"epoch": 18.8,
"learning_rate": 2.0732990090239718e-05,
"loss": 1.3019,
"step": 25160
},
{
"epoch": 18.8,
"learning_rate": 2.071914964291646e-05,
"loss": 1.2977,
"step": 25170
},
{
"epoch": 18.81,
"learning_rate": 2.0705309195593204e-05,
"loss": 1.2819,
"step": 25180
},
{
"epoch": 18.82,
"learning_rate": 2.0691468748269945e-05,
"loss": 1.276,
"step": 25190
},
{
"epoch": 18.83,
"learning_rate": 2.067762830094669e-05,
"loss": 1.2984,
"step": 25200
},
{
"epoch": 18.83,
"learning_rate": 2.066378785362343e-05,
"loss": 1.2692,
"step": 25210
},
{
"epoch": 18.84,
"learning_rate": 2.0649947406300172e-05,
"loss": 1.3263,
"step": 25220
},
{
"epoch": 18.85,
"learning_rate": 2.0636106958976913e-05,
"loss": 1.3372,
"step": 25230
},
{
"epoch": 18.86,
"learning_rate": 2.0622266511653658e-05,
"loss": 1.3128,
"step": 25240
},
{
"epoch": 18.86,
"learning_rate": 2.06084260643304e-05,
"loss": 1.2628,
"step": 25250
},
{
"epoch": 18.87,
"learning_rate": 2.0594585617007144e-05,
"loss": 1.2853,
"step": 25260
},
{
"epoch": 18.88,
"learning_rate": 2.0580745169683885e-05,
"loss": 1.3144,
"step": 25270
},
{
"epoch": 18.89,
"learning_rate": 2.0566904722360626e-05,
"loss": 1.274,
"step": 25280
},
{
"epoch": 18.89,
"learning_rate": 2.055306427503737e-05,
"loss": 1.2358,
"step": 25290
},
{
"epoch": 18.9,
"learning_rate": 2.0539223827714112e-05,
"loss": 1.2694,
"step": 25300
},
{
"epoch": 18.91,
"learning_rate": 2.0525383380390857e-05,
"loss": 1.2643,
"step": 25310
},
{
"epoch": 18.92,
"learning_rate": 2.0511542933067598e-05,
"loss": 1.2807,
"step": 25320
},
{
"epoch": 18.92,
"learning_rate": 2.0497702485744343e-05,
"loss": 1.2847,
"step": 25330
},
{
"epoch": 18.93,
"learning_rate": 2.0483862038421084e-05,
"loss": 1.2623,
"step": 25340
},
{
"epoch": 18.94,
"learning_rate": 2.0470021591097825e-05,
"loss": 1.2993,
"step": 25350
},
{
"epoch": 18.95,
"learning_rate": 2.0456181143774567e-05,
"loss": 1.2709,
"step": 25360
},
{
"epoch": 18.95,
"learning_rate": 2.0442340696451308e-05,
"loss": 1.3454,
"step": 25370
},
{
"epoch": 18.96,
"learning_rate": 2.0428500249128052e-05,
"loss": 1.2804,
"step": 25380
},
{
"epoch": 18.97,
"learning_rate": 2.0414659801804794e-05,
"loss": 1.2775,
"step": 25390
},
{
"epoch": 18.98,
"learning_rate": 2.040081935448154e-05,
"loss": 1.2589,
"step": 25400
},
{
"epoch": 18.98,
"learning_rate": 2.038697890715828e-05,
"loss": 1.2796,
"step": 25410
},
{
"epoch": 18.99,
"learning_rate": 2.0373138459835024e-05,
"loss": 1.2989,
"step": 25420
},
{
"epoch": 19.0,
"learning_rate": 2.0359298012511765e-05,
"loss": 1.3007,
"step": 25430
},
{
"epoch": 19.0,
"eval_accuracy": 0.5800199590314618,
"eval_loss": 0.9854642748832703,
"eval_runtime": 70.8134,
"eval_samples_per_second": 268.862,
"eval_steps_per_second": 8.402,
"step": 25431
},
{
"epoch": 19.01,
"learning_rate": 2.0345457565188507e-05,
"loss": 1.2591,
"step": 25440
},
{
"epoch": 19.01,
"learning_rate": 2.033161711786525e-05,
"loss": 1.2559,
"step": 25450
},
{
"epoch": 19.02,
"learning_rate": 2.0317776670541993e-05,
"loss": 1.2804,
"step": 25460
},
{
"epoch": 19.03,
"learning_rate": 2.0303936223218737e-05,
"loss": 1.2545,
"step": 25470
},
{
"epoch": 19.04,
"learning_rate": 2.029009577589548e-05,
"loss": 1.2771,
"step": 25480
},
{
"epoch": 19.04,
"learning_rate": 2.0276255328572223e-05,
"loss": 1.2753,
"step": 25490
},
{
"epoch": 19.05,
"learning_rate": 2.026241488124896e-05,
"loss": 1.2825,
"step": 25500
},
{
"epoch": 19.06,
"learning_rate": 2.0248574433925706e-05,
"loss": 1.2882,
"step": 25510
},
{
"epoch": 19.07,
"learning_rate": 2.0234733986602447e-05,
"loss": 1.2916,
"step": 25520
},
{
"epoch": 19.07,
"learning_rate": 2.0220893539279188e-05,
"loss": 1.2465,
"step": 25530
},
{
"epoch": 19.08,
"learning_rate": 2.0207053091955933e-05,
"loss": 1.2982,
"step": 25540
},
{
"epoch": 19.09,
"learning_rate": 2.0193212644632674e-05,
"loss": 1.3247,
"step": 25550
},
{
"epoch": 19.1,
"learning_rate": 2.017937219730942e-05,
"loss": 1.3155,
"step": 25560
},
{
"epoch": 19.1,
"learning_rate": 2.016553174998616e-05,
"loss": 1.3026,
"step": 25570
},
{
"epoch": 19.11,
"learning_rate": 2.0151691302662905e-05,
"loss": 1.3223,
"step": 25580
},
{
"epoch": 19.12,
"learning_rate": 2.0137850855339646e-05,
"loss": 1.2452,
"step": 25590
},
{
"epoch": 19.13,
"learning_rate": 2.0124010408016387e-05,
"loss": 1.2687,
"step": 25600
},
{
"epoch": 19.13,
"learning_rate": 2.0110169960693132e-05,
"loss": 1.2666,
"step": 25610
},
{
"epoch": 19.14,
"learning_rate": 2.0096329513369873e-05,
"loss": 1.3194,
"step": 25620
},
{
"epoch": 19.15,
"learning_rate": 2.0082489066046618e-05,
"loss": 1.3042,
"step": 25630
},
{
"epoch": 19.16,
"learning_rate": 2.0068648618723356e-05,
"loss": 1.2598,
"step": 25640
},
{
"epoch": 19.16,
"learning_rate": 2.00548081714001e-05,
"loss": 1.314,
"step": 25650
},
{
"epoch": 19.17,
"learning_rate": 2.004096772407684e-05,
"loss": 1.2644,
"step": 25660
},
{
"epoch": 19.18,
"learning_rate": 2.0027127276753586e-05,
"loss": 1.2982,
"step": 25670
},
{
"epoch": 19.19,
"learning_rate": 2.0013286829430327e-05,
"loss": 1.3051,
"step": 25680
},
{
"epoch": 19.19,
"learning_rate": 1.9999446382107072e-05,
"loss": 1.2479,
"step": 25690
},
{
"epoch": 19.2,
"learning_rate": 1.9985605934783813e-05,
"loss": 1.2951,
"step": 25700
},
{
"epoch": 19.21,
"learning_rate": 1.9971765487460554e-05,
"loss": 1.2911,
"step": 25710
},
{
"epoch": 19.22,
"learning_rate": 1.99579250401373e-05,
"loss": 1.2798,
"step": 25720
},
{
"epoch": 19.22,
"learning_rate": 1.994408459281404e-05,
"loss": 1.2628,
"step": 25730
},
{
"epoch": 19.23,
"learning_rate": 1.9930244145490785e-05,
"loss": 1.2728,
"step": 25740
},
{
"epoch": 19.24,
"learning_rate": 1.9916403698167526e-05,
"loss": 1.2748,
"step": 25750
},
{
"epoch": 19.25,
"learning_rate": 1.990256325084427e-05,
"loss": 1.3346,
"step": 25760
},
{
"epoch": 19.25,
"learning_rate": 1.9888722803521012e-05,
"loss": 1.3171,
"step": 25770
},
{
"epoch": 19.26,
"learning_rate": 1.9874882356197753e-05,
"loss": 1.3271,
"step": 25780
},
{
"epoch": 19.27,
"learning_rate": 1.9861041908874495e-05,
"loss": 1.2273,
"step": 25790
},
{
"epoch": 19.28,
"learning_rate": 1.9847201461551236e-05,
"loss": 1.2881,
"step": 25800
},
{
"epoch": 19.28,
"learning_rate": 1.983336101422798e-05,
"loss": 1.3163,
"step": 25810
},
{
"epoch": 19.29,
"learning_rate": 1.9819520566904722e-05,
"loss": 1.275,
"step": 25820
},
{
"epoch": 19.3,
"learning_rate": 1.9805680119581466e-05,
"loss": 1.2764,
"step": 25830
},
{
"epoch": 19.31,
"learning_rate": 1.9791839672258208e-05,
"loss": 1.2677,
"step": 25840
},
{
"epoch": 19.31,
"learning_rate": 1.9777999224934952e-05,
"loss": 1.2789,
"step": 25850
},
{
"epoch": 19.32,
"learning_rate": 1.9764158777611694e-05,
"loss": 1.3292,
"step": 25860
},
{
"epoch": 19.33,
"learning_rate": 1.9750318330288435e-05,
"loss": 1.2896,
"step": 25870
},
{
"epoch": 19.34,
"learning_rate": 1.973647788296518e-05,
"loss": 1.3036,
"step": 25880
},
{
"epoch": 19.34,
"learning_rate": 1.972263743564192e-05,
"loss": 1.2787,
"step": 25890
},
{
"epoch": 19.35,
"learning_rate": 1.9708796988318665e-05,
"loss": 1.3062,
"step": 25900
},
{
"epoch": 19.36,
"learning_rate": 1.9694956540995403e-05,
"loss": 1.287,
"step": 25910
},
{
"epoch": 19.36,
"learning_rate": 1.9681116093672148e-05,
"loss": 1.2937,
"step": 25920
},
{
"epoch": 19.37,
"learning_rate": 1.966727564634889e-05,
"loss": 1.2935,
"step": 25930
},
{
"epoch": 19.38,
"learning_rate": 1.9653435199025634e-05,
"loss": 1.2589,
"step": 25940
},
{
"epoch": 19.39,
"learning_rate": 1.9639594751702375e-05,
"loss": 1.2831,
"step": 25950
},
{
"epoch": 19.39,
"learning_rate": 1.962575430437912e-05,
"loss": 1.2805,
"step": 25960
},
{
"epoch": 19.4,
"learning_rate": 1.961191385705586e-05,
"loss": 1.243,
"step": 25970
},
{
"epoch": 19.41,
"learning_rate": 1.9598073409732602e-05,
"loss": 1.299,
"step": 25980
},
{
"epoch": 19.42,
"learning_rate": 1.9584232962409347e-05,
"loss": 1.252,
"step": 25990
},
{
"epoch": 19.42,
"learning_rate": 1.9570392515086088e-05,
"loss": 1.292,
"step": 26000
},
{
"epoch": 19.43,
"learning_rate": 1.9556552067762833e-05,
"loss": 1.2626,
"step": 26010
},
{
"epoch": 19.44,
"learning_rate": 1.9542711620439574e-05,
"loss": 1.3084,
"step": 26020
},
{
"epoch": 19.45,
"learning_rate": 1.952887117311632e-05,
"loss": 1.2673,
"step": 26030
},
{
"epoch": 19.45,
"learning_rate": 1.951503072579306e-05,
"loss": 1.2718,
"step": 26040
},
{
"epoch": 19.46,
"learning_rate": 1.95011902784698e-05,
"loss": 1.2836,
"step": 26050
},
{
"epoch": 19.47,
"learning_rate": 1.9487349831146542e-05,
"loss": 1.2772,
"step": 26060
},
{
"epoch": 19.48,
"learning_rate": 1.9473509383823284e-05,
"loss": 1.2704,
"step": 26070
},
{
"epoch": 19.48,
"learning_rate": 1.9459668936500028e-05,
"loss": 1.2845,
"step": 26080
},
{
"epoch": 19.49,
"learning_rate": 1.944582848917677e-05,
"loss": 1.251,
"step": 26090
},
{
"epoch": 19.5,
"learning_rate": 1.9431988041853514e-05,
"loss": 1.302,
"step": 26100
},
{
"epoch": 19.51,
"learning_rate": 1.9418147594530255e-05,
"loss": 1.2716,
"step": 26110
},
{
"epoch": 19.51,
"learning_rate": 1.9404307147207e-05,
"loss": 1.2642,
"step": 26120
},
{
"epoch": 19.52,
"learning_rate": 1.939046669988374e-05,
"loss": 1.3064,
"step": 26130
},
{
"epoch": 19.53,
"learning_rate": 1.9376626252560483e-05,
"loss": 1.2834,
"step": 26140
},
{
"epoch": 19.54,
"learning_rate": 1.9362785805237227e-05,
"loss": 1.2964,
"step": 26150
},
{
"epoch": 19.54,
"learning_rate": 1.934894535791397e-05,
"loss": 1.284,
"step": 26160
},
{
"epoch": 19.55,
"learning_rate": 1.9335104910590713e-05,
"loss": 1.3028,
"step": 26170
},
{
"epoch": 19.56,
"learning_rate": 1.9321264463267454e-05,
"loss": 1.303,
"step": 26180
},
{
"epoch": 19.57,
"learning_rate": 1.9307424015944196e-05,
"loss": 1.262,
"step": 26190
},
{
"epoch": 19.57,
"learning_rate": 1.9293583568620937e-05,
"loss": 1.2959,
"step": 26200
},
{
"epoch": 19.58,
"learning_rate": 1.927974312129768e-05,
"loss": 1.3257,
"step": 26210
},
{
"epoch": 19.59,
"learning_rate": 1.9265902673974423e-05,
"loss": 1.2838,
"step": 26220
},
{
"epoch": 19.6,
"learning_rate": 1.9252062226651167e-05,
"loss": 1.2753,
"step": 26230
},
{
"epoch": 19.6,
"learning_rate": 1.923822177932791e-05,
"loss": 1.2927,
"step": 26240
},
{
"epoch": 19.61,
"learning_rate": 1.922438133200465e-05,
"loss": 1.2392,
"step": 26250
},
{
"epoch": 19.62,
"learning_rate": 1.9210540884681395e-05,
"loss": 1.3028,
"step": 26260
},
{
"epoch": 19.63,
"learning_rate": 1.9196700437358136e-05,
"loss": 1.2982,
"step": 26270
},
{
"epoch": 19.63,
"learning_rate": 1.918285999003488e-05,
"loss": 1.2701,
"step": 26280
},
{
"epoch": 19.64,
"learning_rate": 1.9169019542711622e-05,
"loss": 1.2948,
"step": 26290
},
{
"epoch": 19.65,
"learning_rate": 1.9155179095388366e-05,
"loss": 1.2837,
"step": 26300
},
{
"epoch": 19.66,
"learning_rate": 1.9141338648065108e-05,
"loss": 1.2626,
"step": 26310
},
{
"epoch": 19.66,
"learning_rate": 1.912749820074185e-05,
"loss": 1.2802,
"step": 26320
},
{
"epoch": 19.67,
"learning_rate": 1.911365775341859e-05,
"loss": 1.2773,
"step": 26330
},
{
"epoch": 19.68,
"learning_rate": 1.909981730609533e-05,
"loss": 1.279,
"step": 26340
},
{
"epoch": 19.69,
"learning_rate": 1.9085976858772076e-05,
"loss": 1.2716,
"step": 26350
},
{
"epoch": 19.69,
"learning_rate": 1.9072136411448817e-05,
"loss": 1.2729,
"step": 26360
},
{
"epoch": 19.7,
"learning_rate": 1.9058295964125562e-05,
"loss": 1.2679,
"step": 26370
},
{
"epoch": 19.71,
"learning_rate": 1.9044455516802303e-05,
"loss": 1.3082,
"step": 26380
},
{
"epoch": 19.72,
"learning_rate": 1.9030615069479048e-05,
"loss": 1.2802,
"step": 26390
},
{
"epoch": 19.72,
"learning_rate": 1.901677462215579e-05,
"loss": 1.2668,
"step": 26400
},
{
"epoch": 19.73,
"learning_rate": 1.900293417483253e-05,
"loss": 1.279,
"step": 26410
},
{
"epoch": 19.74,
"learning_rate": 1.8989093727509275e-05,
"loss": 1.2712,
"step": 26420
},
{
"epoch": 19.75,
"learning_rate": 1.8975253280186016e-05,
"loss": 1.2735,
"step": 26430
},
{
"epoch": 19.75,
"learning_rate": 1.896141283286276e-05,
"loss": 1.2556,
"step": 26440
},
{
"epoch": 19.76,
"learning_rate": 1.8947572385539502e-05,
"loss": 1.2827,
"step": 26450
},
{
"epoch": 19.77,
"learning_rate": 1.8933731938216247e-05,
"loss": 1.2585,
"step": 26460
},
{
"epoch": 19.78,
"learning_rate": 1.8919891490892985e-05,
"loss": 1.2878,
"step": 26470
},
{
"epoch": 19.78,
"learning_rate": 1.890605104356973e-05,
"loss": 1.2727,
"step": 26480
},
{
"epoch": 19.79,
"learning_rate": 1.889221059624647e-05,
"loss": 1.2671,
"step": 26490
},
{
"epoch": 19.8,
"learning_rate": 1.8878370148923212e-05,
"loss": 1.2532,
"step": 26500
},
{
"epoch": 19.81,
"learning_rate": 1.8864529701599956e-05,
"loss": 1.2481,
"step": 26510
},
{
"epoch": 19.81,
"learning_rate": 1.8850689254276698e-05,
"loss": 1.3098,
"step": 26520
},
{
"epoch": 19.82,
"learning_rate": 1.8836848806953442e-05,
"loss": 1.2917,
"step": 26530
},
{
"epoch": 19.83,
"learning_rate": 1.8823008359630184e-05,
"loss": 1.2602,
"step": 26540
},
{
"epoch": 19.84,
"learning_rate": 1.8809167912306928e-05,
"loss": 1.2445,
"step": 26550
},
{
"epoch": 19.84,
"learning_rate": 1.879532746498367e-05,
"loss": 1.3125,
"step": 26560
},
{
"epoch": 19.85,
"learning_rate": 1.878148701766041e-05,
"loss": 1.3162,
"step": 26570
},
{
"epoch": 19.86,
"learning_rate": 1.8767646570337155e-05,
"loss": 1.2754,
"step": 26580
},
{
"epoch": 19.87,
"learning_rate": 1.8753806123013897e-05,
"loss": 1.2726,
"step": 26590
},
{
"epoch": 19.87,
"learning_rate": 1.873996567569064e-05,
"loss": 1.2695,
"step": 26600
},
{
"epoch": 19.88,
"learning_rate": 1.872612522836738e-05,
"loss": 1.2772,
"step": 26610
},
{
"epoch": 19.89,
"learning_rate": 1.8712284781044124e-05,
"loss": 1.289,
"step": 26620
},
{
"epoch": 19.9,
"learning_rate": 1.8698444333720865e-05,
"loss": 1.2998,
"step": 26630
},
{
"epoch": 19.9,
"learning_rate": 1.868460388639761e-05,
"loss": 1.2854,
"step": 26640
},
{
"epoch": 19.91,
"learning_rate": 1.867076343907435e-05,
"loss": 1.2884,
"step": 26650
},
{
"epoch": 19.92,
"learning_rate": 1.8656922991751096e-05,
"loss": 1.2635,
"step": 26660
},
{
"epoch": 19.93,
"learning_rate": 1.8643082544427837e-05,
"loss": 1.3038,
"step": 26670
},
{
"epoch": 19.93,
"learning_rate": 1.8629242097104578e-05,
"loss": 1.2943,
"step": 26680
},
{
"epoch": 19.94,
"learning_rate": 1.8615401649781323e-05,
"loss": 1.2811,
"step": 26690
},
{
"epoch": 19.95,
"learning_rate": 1.8601561202458064e-05,
"loss": 1.3272,
"step": 26700
},
{
"epoch": 19.96,
"learning_rate": 1.858772075513481e-05,
"loss": 1.3205,
"step": 26710
},
{
"epoch": 19.96,
"learning_rate": 1.857388030781155e-05,
"loss": 1.2723,
"step": 26720
},
{
"epoch": 19.97,
"learning_rate": 1.8560039860488295e-05,
"loss": 1.2695,
"step": 26730
},
{
"epoch": 19.98,
"learning_rate": 1.8546199413165036e-05,
"loss": 1.2539,
"step": 26740
},
{
"epoch": 19.99,
"learning_rate": 1.8532358965841777e-05,
"loss": 1.3049,
"step": 26750
},
{
"epoch": 19.99,
"learning_rate": 1.8518518518518518e-05,
"loss": 1.3084,
"step": 26760
},
{
"epoch": 20.0,
"learning_rate": 1.850467807119526e-05,
"loss": 1.2927,
"step": 26770
},
{
"epoch": 20.0,
"eval_accuracy": 0.5925206155785493,
"eval_loss": 0.949884295463562,
"eval_runtime": 69.7142,
"eval_samples_per_second": 273.101,
"eval_steps_per_second": 8.535,
"step": 26770
},
{
"epoch": 20.01,
"learning_rate": 1.8490837623872004e-05,
"loss": 1.2882,
"step": 26780
},
{
"epoch": 20.01,
"learning_rate": 1.8476997176548745e-05,
"loss": 1.3038,
"step": 26790
},
{
"epoch": 20.02,
"learning_rate": 1.846315672922549e-05,
"loss": 1.2585,
"step": 26800
},
{
"epoch": 20.03,
"learning_rate": 1.844931628190223e-05,
"loss": 1.2466,
"step": 26810
},
{
"epoch": 20.04,
"learning_rate": 1.8435475834578976e-05,
"loss": 1.2871,
"step": 26820
},
{
"epoch": 20.04,
"learning_rate": 1.8421635387255717e-05,
"loss": 1.2465,
"step": 26830
},
{
"epoch": 20.05,
"learning_rate": 1.840779493993246e-05,
"loss": 1.2872,
"step": 26840
},
{
"epoch": 20.06,
"learning_rate": 1.8393954492609203e-05,
"loss": 1.2679,
"step": 26850
},
{
"epoch": 20.07,
"learning_rate": 1.8380114045285944e-05,
"loss": 1.2818,
"step": 26860
},
{
"epoch": 20.07,
"learning_rate": 1.836627359796269e-05,
"loss": 1.2668,
"step": 26870
},
{
"epoch": 20.08,
"learning_rate": 1.835243315063943e-05,
"loss": 1.3143,
"step": 26880
},
{
"epoch": 20.09,
"learning_rate": 1.833859270331617e-05,
"loss": 1.297,
"step": 26890
},
{
"epoch": 20.1,
"learning_rate": 1.8324752255992913e-05,
"loss": 1.2566,
"step": 26900
},
{
"epoch": 20.1,
"learning_rate": 1.8310911808669657e-05,
"loss": 1.2377,
"step": 26910
},
{
"epoch": 20.11,
"learning_rate": 1.82970713613464e-05,
"loss": 1.2873,
"step": 26920
},
{
"epoch": 20.12,
"learning_rate": 1.8283230914023143e-05,
"loss": 1.3227,
"step": 26930
},
{
"epoch": 20.13,
"learning_rate": 1.8269390466699885e-05,
"loss": 1.2451,
"step": 26940
},
{
"epoch": 20.13,
"learning_rate": 1.8255550019376626e-05,
"loss": 1.2711,
"step": 26950
},
{
"epoch": 20.14,
"learning_rate": 1.824170957205337e-05,
"loss": 1.2773,
"step": 26960
},
{
"epoch": 20.15,
"learning_rate": 1.8227869124730112e-05,
"loss": 1.2749,
"step": 26970
},
{
"epoch": 20.16,
"learning_rate": 1.8214028677406856e-05,
"loss": 1.2906,
"step": 26980
},
{
"epoch": 20.16,
"learning_rate": 1.8200188230083598e-05,
"loss": 1.2638,
"step": 26990
},
{
"epoch": 20.17,
"learning_rate": 1.8186347782760342e-05,
"loss": 1.2724,
"step": 27000
},
{
"epoch": 20.18,
"learning_rate": 1.8172507335437084e-05,
"loss": 1.2943,
"step": 27010
},
{
"epoch": 20.19,
"learning_rate": 1.8158666888113825e-05,
"loss": 1.3112,
"step": 27020
},
{
"epoch": 20.19,
"learning_rate": 1.8144826440790566e-05,
"loss": 1.2323,
"step": 27030
},
{
"epoch": 20.2,
"learning_rate": 1.8130985993467307e-05,
"loss": 1.2776,
"step": 27040
},
{
"epoch": 20.21,
"learning_rate": 1.8117145546144052e-05,
"loss": 1.3116,
"step": 27050
},
{
"epoch": 20.22,
"learning_rate": 1.8103305098820793e-05,
"loss": 1.3118,
"step": 27060
},
{
"epoch": 20.22,
"learning_rate": 1.8089464651497538e-05,
"loss": 1.2827,
"step": 27070
},
{
"epoch": 20.23,
"learning_rate": 1.807562420417428e-05,
"loss": 1.2645,
"step": 27080
},
{
"epoch": 20.24,
"learning_rate": 1.8061783756851024e-05,
"loss": 1.275,
"step": 27090
},
{
"epoch": 20.25,
"learning_rate": 1.8047943309527765e-05,
"loss": 1.2766,
"step": 27100
},
{
"epoch": 20.25,
"learning_rate": 1.8034102862204506e-05,
"loss": 1.2865,
"step": 27110
},
{
"epoch": 20.26,
"learning_rate": 1.802026241488125e-05,
"loss": 1.2497,
"step": 27120
},
{
"epoch": 20.27,
"learning_rate": 1.8006421967557992e-05,
"loss": 1.2468,
"step": 27130
},
{
"epoch": 20.28,
"learning_rate": 1.7992581520234737e-05,
"loss": 1.2418,
"step": 27140
},
{
"epoch": 20.28,
"learning_rate": 1.7978741072911478e-05,
"loss": 1.3123,
"step": 27150
},
{
"epoch": 20.29,
"learning_rate": 1.796490062558822e-05,
"loss": 1.2588,
"step": 27160
},
{
"epoch": 20.3,
"learning_rate": 1.795106017826496e-05,
"loss": 1.2677,
"step": 27170
},
{
"epoch": 20.31,
"learning_rate": 1.7937219730941705e-05,
"loss": 1.2627,
"step": 27180
},
{
"epoch": 20.31,
"learning_rate": 1.7923379283618446e-05,
"loss": 1.2768,
"step": 27190
},
{
"epoch": 20.32,
"learning_rate": 1.790953883629519e-05,
"loss": 1.2855,
"step": 27200
},
{
"epoch": 20.33,
"learning_rate": 1.7895698388971932e-05,
"loss": 1.2418,
"step": 27210
},
{
"epoch": 20.34,
"learning_rate": 1.7881857941648674e-05,
"loss": 1.276,
"step": 27220
},
{
"epoch": 20.34,
"learning_rate": 1.7868017494325418e-05,
"loss": 1.265,
"step": 27230
},
{
"epoch": 20.35,
"learning_rate": 1.785417704700216e-05,
"loss": 1.2635,
"step": 27240
},
{
"epoch": 20.36,
"learning_rate": 1.7840336599678904e-05,
"loss": 1.3431,
"step": 27250
},
{
"epoch": 20.37,
"learning_rate": 1.7826496152355645e-05,
"loss": 1.2997,
"step": 27260
},
{
"epoch": 20.37,
"learning_rate": 1.781265570503239e-05,
"loss": 1.2714,
"step": 27270
},
{
"epoch": 20.38,
"learning_rate": 1.779881525770913e-05,
"loss": 1.2628,
"step": 27280
},
{
"epoch": 20.39,
"learning_rate": 1.7784974810385873e-05,
"loss": 1.3246,
"step": 27290
},
{
"epoch": 20.4,
"learning_rate": 1.7771134363062614e-05,
"loss": 1.3358,
"step": 27300
},
{
"epoch": 20.4,
"learning_rate": 1.7757293915739355e-05,
"loss": 1.3001,
"step": 27310
},
{
"epoch": 20.41,
"learning_rate": 1.77434534684161e-05,
"loss": 1.2963,
"step": 27320
},
{
"epoch": 20.42,
"learning_rate": 1.772961302109284e-05,
"loss": 1.2656,
"step": 27330
},
{
"epoch": 20.43,
"learning_rate": 1.7715772573769586e-05,
"loss": 1.2574,
"step": 27340
},
{
"epoch": 20.43,
"learning_rate": 1.7701932126446327e-05,
"loss": 1.2579,
"step": 27350
},
{
"epoch": 20.44,
"learning_rate": 1.768809167912307e-05,
"loss": 1.258,
"step": 27360
},
{
"epoch": 20.45,
"learning_rate": 1.7674251231799813e-05,
"loss": 1.3122,
"step": 27370
},
{
"epoch": 20.46,
"learning_rate": 1.7660410784476554e-05,
"loss": 1.2788,
"step": 27380
},
{
"epoch": 20.46,
"learning_rate": 1.76465703371533e-05,
"loss": 1.2659,
"step": 27390
},
{
"epoch": 20.47,
"learning_rate": 1.763272988983004e-05,
"loss": 1.3361,
"step": 27400
},
{
"epoch": 20.48,
"learning_rate": 1.7618889442506784e-05,
"loss": 1.2534,
"step": 27410
},
{
"epoch": 20.49,
"learning_rate": 1.7605048995183526e-05,
"loss": 1.2823,
"step": 27420
},
{
"epoch": 20.49,
"learning_rate": 1.759120854786027e-05,
"loss": 1.2438,
"step": 27430
},
{
"epoch": 20.5,
"learning_rate": 1.7577368100537008e-05,
"loss": 1.3101,
"step": 27440
},
{
"epoch": 20.51,
"learning_rate": 1.7563527653213753e-05,
"loss": 1.2095,
"step": 27450
},
{
"epoch": 20.52,
"learning_rate": 1.7549687205890494e-05,
"loss": 1.2439,
"step": 27460
},
{
"epoch": 20.52,
"learning_rate": 1.7535846758567235e-05,
"loss": 1.2787,
"step": 27470
},
{
"epoch": 20.53,
"learning_rate": 1.752200631124398e-05,
"loss": 1.2773,
"step": 27480
},
{
"epoch": 20.54,
"learning_rate": 1.750816586392072e-05,
"loss": 1.2723,
"step": 27490
},
{
"epoch": 20.55,
"learning_rate": 1.7494325416597466e-05,
"loss": 1.2788,
"step": 27500
},
{
"epoch": 20.55,
"learning_rate": 1.7480484969274207e-05,
"loss": 1.253,
"step": 27510
},
{
"epoch": 20.56,
"learning_rate": 1.7466644521950952e-05,
"loss": 1.2851,
"step": 27520
},
{
"epoch": 20.57,
"learning_rate": 1.7452804074627693e-05,
"loss": 1.2785,
"step": 27530
},
{
"epoch": 20.58,
"learning_rate": 1.7438963627304434e-05,
"loss": 1.2622,
"step": 27540
},
{
"epoch": 20.58,
"learning_rate": 1.742512317998118e-05,
"loss": 1.3016,
"step": 27550
},
{
"epoch": 20.59,
"learning_rate": 1.741128273265792e-05,
"loss": 1.3005,
"step": 27560
},
{
"epoch": 20.6,
"learning_rate": 1.7397442285334665e-05,
"loss": 1.2738,
"step": 27570
},
{
"epoch": 20.61,
"learning_rate": 1.7383601838011403e-05,
"loss": 1.3012,
"step": 27580
},
{
"epoch": 20.61,
"learning_rate": 1.7369761390688147e-05,
"loss": 1.2786,
"step": 27590
},
{
"epoch": 20.62,
"learning_rate": 1.735592094336489e-05,
"loss": 1.2719,
"step": 27600
},
{
"epoch": 20.63,
"learning_rate": 1.7342080496041633e-05,
"loss": 1.3044,
"step": 27610
},
{
"epoch": 20.64,
"learning_rate": 1.7328240048718375e-05,
"loss": 1.2824,
"step": 27620
},
{
"epoch": 20.64,
"learning_rate": 1.731439960139512e-05,
"loss": 1.2422,
"step": 27630
},
{
"epoch": 20.65,
"learning_rate": 1.730055915407186e-05,
"loss": 1.2498,
"step": 27640
},
{
"epoch": 20.66,
"learning_rate": 1.72867187067486e-05,
"loss": 1.2807,
"step": 27650
},
{
"epoch": 20.66,
"learning_rate": 1.7272878259425346e-05,
"loss": 1.253,
"step": 27660
},
{
"epoch": 20.67,
"learning_rate": 1.7259037812102088e-05,
"loss": 1.2534,
"step": 27670
},
{
"epoch": 20.68,
"learning_rate": 1.7245197364778832e-05,
"loss": 1.2705,
"step": 27680
},
{
"epoch": 20.69,
"learning_rate": 1.7231356917455573e-05,
"loss": 1.313,
"step": 27690
},
{
"epoch": 20.69,
"learning_rate": 1.7217516470132318e-05,
"loss": 1.2899,
"step": 27700
},
{
"epoch": 20.7,
"learning_rate": 1.720367602280906e-05,
"loss": 1.2733,
"step": 27710
},
{
"epoch": 20.71,
"learning_rate": 1.71898355754858e-05,
"loss": 1.2719,
"step": 27720
},
{
"epoch": 20.72,
"learning_rate": 1.7175995128162542e-05,
"loss": 1.2427,
"step": 27730
},
{
"epoch": 20.72,
"learning_rate": 1.7162154680839283e-05,
"loss": 1.2878,
"step": 27740
},
{
"epoch": 20.73,
"learning_rate": 1.7148314233516028e-05,
"loss": 1.3017,
"step": 27750
},
{
"epoch": 20.74,
"learning_rate": 1.713447378619277e-05,
"loss": 1.2841,
"step": 27760
},
{
"epoch": 20.75,
"learning_rate": 1.7120633338869514e-05,
"loss": 1.3078,
"step": 27770
},
{
"epoch": 20.75,
"learning_rate": 1.7106792891546255e-05,
"loss": 1.2823,
"step": 27780
},
{
"epoch": 20.76,
"learning_rate": 1.7092952444223e-05,
"loss": 1.2627,
"step": 27790
},
{
"epoch": 20.77,
"learning_rate": 1.707911199689974e-05,
"loss": 1.2338,
"step": 27800
},
{
"epoch": 20.78,
"learning_rate": 1.7065271549576482e-05,
"loss": 1.2355,
"step": 27810
},
{
"epoch": 20.78,
"learning_rate": 1.7051431102253227e-05,
"loss": 1.2774,
"step": 27820
},
{
"epoch": 20.79,
"learning_rate": 1.7037590654929968e-05,
"loss": 1.2406,
"step": 27830
},
{
"epoch": 20.8,
"learning_rate": 1.7023750207606713e-05,
"loss": 1.2693,
"step": 27840
},
{
"epoch": 20.81,
"learning_rate": 1.7009909760283454e-05,
"loss": 1.2699,
"step": 27850
},
{
"epoch": 20.81,
"learning_rate": 1.6996069312960195e-05,
"loss": 1.2963,
"step": 27860
},
{
"epoch": 20.82,
"learning_rate": 1.6982228865636936e-05,
"loss": 1.2709,
"step": 27870
},
{
"epoch": 20.83,
"learning_rate": 1.696838841831368e-05,
"loss": 1.3071,
"step": 27880
},
{
"epoch": 20.84,
"learning_rate": 1.6954547970990422e-05,
"loss": 1.3287,
"step": 27890
},
{
"epoch": 20.84,
"learning_rate": 1.6940707523667167e-05,
"loss": 1.2588,
"step": 27900
},
{
"epoch": 20.85,
"learning_rate": 1.6926867076343908e-05,
"loss": 1.3117,
"step": 27910
},
{
"epoch": 20.86,
"learning_rate": 1.691302662902065e-05,
"loss": 1.2707,
"step": 27920
},
{
"epoch": 20.87,
"learning_rate": 1.6899186181697394e-05,
"loss": 1.2208,
"step": 27930
},
{
"epoch": 20.87,
"learning_rate": 1.6885345734374135e-05,
"loss": 1.2397,
"step": 27940
},
{
"epoch": 20.88,
"learning_rate": 1.687150528705088e-05,
"loss": 1.3318,
"step": 27950
},
{
"epoch": 20.89,
"learning_rate": 1.685766483972762e-05,
"loss": 1.3184,
"step": 27960
},
{
"epoch": 20.9,
"learning_rate": 1.6843824392404366e-05,
"loss": 1.2822,
"step": 27970
},
{
"epoch": 20.9,
"learning_rate": 1.6829983945081107e-05,
"loss": 1.3117,
"step": 27980
},
{
"epoch": 20.91,
"learning_rate": 1.681614349775785e-05,
"loss": 1.2798,
"step": 27990
},
{
"epoch": 20.92,
"learning_rate": 1.680230305043459e-05,
"loss": 1.2829,
"step": 28000
},
{
"epoch": 20.93,
"learning_rate": 1.678846260311133e-05,
"loss": 1.2759,
"step": 28010
},
{
"epoch": 20.93,
"learning_rate": 1.6774622155788076e-05,
"loss": 1.2951,
"step": 28020
},
{
"epoch": 20.94,
"learning_rate": 1.6760781708464817e-05,
"loss": 1.2816,
"step": 28030
},
{
"epoch": 20.95,
"learning_rate": 1.674694126114156e-05,
"loss": 1.3016,
"step": 28040
},
{
"epoch": 20.96,
"learning_rate": 1.6733100813818303e-05,
"loss": 1.2376,
"step": 28050
},
{
"epoch": 20.96,
"learning_rate": 1.6719260366495047e-05,
"loss": 1.2757,
"step": 28060
},
{
"epoch": 20.97,
"learning_rate": 1.670541991917179e-05,
"loss": 1.2941,
"step": 28070
},
{
"epoch": 20.98,
"learning_rate": 1.669157947184853e-05,
"loss": 1.2866,
"step": 28080
},
{
"epoch": 20.99,
"learning_rate": 1.6677739024525274e-05,
"loss": 1.3083,
"step": 28090
},
{
"epoch": 20.99,
"learning_rate": 1.6663898577202016e-05,
"loss": 1.2985,
"step": 28100
},
{
"epoch": 21.0,
"eval_accuracy": 0.5854299070329324,
"eval_loss": 0.9668921828269958,
"eval_runtime": 72.7845,
"eval_samples_per_second": 261.58,
"eval_steps_per_second": 8.175,
"step": 28108
},
{
"epoch": 21.0,
"learning_rate": 1.665005812987876e-05,
"loss": 1.2521,
"step": 28110
},
{
"epoch": 21.01,
"learning_rate": 1.66362176825555e-05,
"loss": 1.2516,
"step": 28120
},
{
"epoch": 21.02,
"learning_rate": 1.6622377235232246e-05,
"loss": 1.2855,
"step": 28130
},
{
"epoch": 21.02,
"learning_rate": 1.6608536787908984e-05,
"loss": 1.2413,
"step": 28140
},
{
"epoch": 21.03,
"learning_rate": 1.659469634058573e-05,
"loss": 1.2043,
"step": 28150
},
{
"epoch": 21.04,
"learning_rate": 1.658085589326247e-05,
"loss": 1.2894,
"step": 28160
},
{
"epoch": 21.05,
"learning_rate": 1.656701544593921e-05,
"loss": 1.2416,
"step": 28170
},
{
"epoch": 21.05,
"learning_rate": 1.6553174998615956e-05,
"loss": 1.2665,
"step": 28180
},
{
"epoch": 21.06,
"learning_rate": 1.6539334551292697e-05,
"loss": 1.2837,
"step": 28190
},
{
"epoch": 21.07,
"learning_rate": 1.6525494103969442e-05,
"loss": 1.3306,
"step": 28200
},
{
"epoch": 21.08,
"learning_rate": 1.6511653656646183e-05,
"loss": 1.2277,
"step": 28210
},
{
"epoch": 21.08,
"learning_rate": 1.6497813209322928e-05,
"loss": 1.2478,
"step": 28220
},
{
"epoch": 21.09,
"learning_rate": 1.648397276199967e-05,
"loss": 1.2661,
"step": 28230
},
{
"epoch": 21.1,
"learning_rate": 1.6470132314676414e-05,
"loss": 1.2505,
"step": 28240
},
{
"epoch": 21.11,
"learning_rate": 1.6456291867353155e-05,
"loss": 1.2666,
"step": 28250
},
{
"epoch": 21.11,
"learning_rate": 1.6442451420029896e-05,
"loss": 1.3113,
"step": 28260
},
{
"epoch": 21.12,
"learning_rate": 1.6428610972706637e-05,
"loss": 1.2283,
"step": 28270
},
{
"epoch": 21.13,
"learning_rate": 1.641477052538338e-05,
"loss": 1.2641,
"step": 28280
},
{
"epoch": 21.14,
"learning_rate": 1.6400930078060123e-05,
"loss": 1.25,
"step": 28290
},
{
"epoch": 21.14,
"learning_rate": 1.6387089630736865e-05,
"loss": 1.2611,
"step": 28300
},
{
"epoch": 21.15,
"learning_rate": 1.637324918341361e-05,
"loss": 1.212,
"step": 28310
},
{
"epoch": 21.16,
"learning_rate": 1.635940873609035e-05,
"loss": 1.3028,
"step": 28320
},
{
"epoch": 21.17,
"learning_rate": 1.6345568288767095e-05,
"loss": 1.3137,
"step": 28330
},
{
"epoch": 21.17,
"learning_rate": 1.6331727841443836e-05,
"loss": 1.2567,
"step": 28340
},
{
"epoch": 21.18,
"learning_rate": 1.6317887394120578e-05,
"loss": 1.2859,
"step": 28350
},
{
"epoch": 21.19,
"learning_rate": 1.6304046946797322e-05,
"loss": 1.2737,
"step": 28360
},
{
"epoch": 21.2,
"learning_rate": 1.6290206499474063e-05,
"loss": 1.2937,
"step": 28370
},
{
"epoch": 21.2,
"learning_rate": 1.6276366052150808e-05,
"loss": 1.3292,
"step": 28380
},
{
"epoch": 21.21,
"learning_rate": 1.626252560482755e-05,
"loss": 1.2635,
"step": 28390
},
{
"epoch": 21.22,
"learning_rate": 1.6248685157504294e-05,
"loss": 1.3097,
"step": 28400
},
{
"epoch": 21.23,
"learning_rate": 1.6234844710181032e-05,
"loss": 1.2691,
"step": 28410
},
{
"epoch": 21.23,
"learning_rate": 1.6221004262857777e-05,
"loss": 1.2705,
"step": 28420
},
{
"epoch": 21.24,
"learning_rate": 1.6207163815534518e-05,
"loss": 1.2883,
"step": 28430
},
{
"epoch": 21.25,
"learning_rate": 1.619332336821126e-05,
"loss": 1.2412,
"step": 28440
},
{
"epoch": 21.26,
"learning_rate": 1.6179482920888004e-05,
"loss": 1.2863,
"step": 28450
},
{
"epoch": 21.26,
"learning_rate": 1.6165642473564745e-05,
"loss": 1.2381,
"step": 28460
},
{
"epoch": 21.27,
"learning_rate": 1.615180202624149e-05,
"loss": 1.2604,
"step": 28470
},
{
"epoch": 21.28,
"learning_rate": 1.613796157891823e-05,
"loss": 1.265,
"step": 28480
},
{
"epoch": 21.29,
"learning_rate": 1.6124121131594975e-05,
"loss": 1.2845,
"step": 28490
},
{
"epoch": 21.29,
"learning_rate": 1.6110280684271717e-05,
"loss": 1.2823,
"step": 28500
},
{
"epoch": 21.3,
"learning_rate": 1.6096440236948458e-05,
"loss": 1.2745,
"step": 28510
},
{
"epoch": 21.31,
"learning_rate": 1.6082599789625203e-05,
"loss": 1.2861,
"step": 28520
},
{
"epoch": 21.31,
"learning_rate": 1.6068759342301944e-05,
"loss": 1.2449,
"step": 28530
},
{
"epoch": 21.32,
"learning_rate": 1.605491889497869e-05,
"loss": 1.2503,
"step": 28540
},
{
"epoch": 21.33,
"learning_rate": 1.6041078447655426e-05,
"loss": 1.2628,
"step": 28550
},
{
"epoch": 21.34,
"learning_rate": 1.602723800033217e-05,
"loss": 1.2749,
"step": 28560
},
{
"epoch": 21.34,
"learning_rate": 1.6013397553008912e-05,
"loss": 1.2519,
"step": 28570
},
{
"epoch": 21.35,
"learning_rate": 1.5999557105685657e-05,
"loss": 1.3134,
"step": 28580
},
{
"epoch": 21.36,
"learning_rate": 1.5985716658362398e-05,
"loss": 1.27,
"step": 28590
},
{
"epoch": 21.37,
"learning_rate": 1.5971876211039143e-05,
"loss": 1.3088,
"step": 28600
},
{
"epoch": 21.37,
"learning_rate": 1.5958035763715884e-05,
"loss": 1.2932,
"step": 28610
},
{
"epoch": 21.38,
"learning_rate": 1.5944195316392625e-05,
"loss": 1.26,
"step": 28620
},
{
"epoch": 21.39,
"learning_rate": 1.593035486906937e-05,
"loss": 1.2789,
"step": 28630
},
{
"epoch": 21.4,
"learning_rate": 1.591651442174611e-05,
"loss": 1.2676,
"step": 28640
},
{
"epoch": 21.4,
"learning_rate": 1.5902673974422856e-05,
"loss": 1.2845,
"step": 28650
},
{
"epoch": 21.41,
"learning_rate": 1.5888833527099597e-05,
"loss": 1.2694,
"step": 28660
},
{
"epoch": 21.42,
"learning_rate": 1.5874993079776342e-05,
"loss": 1.2814,
"step": 28670
},
{
"epoch": 21.43,
"learning_rate": 1.5861152632453083e-05,
"loss": 1.2573,
"step": 28680
},
{
"epoch": 21.43,
"learning_rate": 1.5847312185129824e-05,
"loss": 1.2944,
"step": 28690
},
{
"epoch": 21.44,
"learning_rate": 1.5833471737806565e-05,
"loss": 1.2694,
"step": 28700
},
{
"epoch": 21.45,
"learning_rate": 1.5819631290483307e-05,
"loss": 1.2739,
"step": 28710
},
{
"epoch": 21.46,
"learning_rate": 1.580579084316005e-05,
"loss": 1.2513,
"step": 28720
},
{
"epoch": 21.46,
"learning_rate": 1.5791950395836793e-05,
"loss": 1.2625,
"step": 28730
},
{
"epoch": 21.47,
"learning_rate": 1.5778109948513537e-05,
"loss": 1.2757,
"step": 28740
},
{
"epoch": 21.48,
"learning_rate": 1.576426950119028e-05,
"loss": 1.2574,
"step": 28750
},
{
"epoch": 21.49,
"learning_rate": 1.5750429053867023e-05,
"loss": 1.2995,
"step": 28760
},
{
"epoch": 21.49,
"learning_rate": 1.5736588606543764e-05,
"loss": 1.3044,
"step": 28770
},
{
"epoch": 21.5,
"learning_rate": 1.5722748159220506e-05,
"loss": 1.2873,
"step": 28780
},
{
"epoch": 21.51,
"learning_rate": 1.570890771189725e-05,
"loss": 1.2338,
"step": 28790
},
{
"epoch": 21.52,
"learning_rate": 1.569506726457399e-05,
"loss": 1.241,
"step": 28800
},
{
"epoch": 21.52,
"learning_rate": 1.5681226817250736e-05,
"loss": 1.2958,
"step": 28810
},
{
"epoch": 21.53,
"learning_rate": 1.5667386369927477e-05,
"loss": 1.2799,
"step": 28820
},
{
"epoch": 21.54,
"learning_rate": 1.565354592260422e-05,
"loss": 1.2474,
"step": 28830
},
{
"epoch": 21.55,
"learning_rate": 1.563970547528096e-05,
"loss": 1.2732,
"step": 28840
},
{
"epoch": 21.55,
"learning_rate": 1.5625865027957705e-05,
"loss": 1.2778,
"step": 28850
},
{
"epoch": 21.56,
"learning_rate": 1.5612024580634446e-05,
"loss": 1.3026,
"step": 28860
},
{
"epoch": 21.57,
"learning_rate": 1.559818413331119e-05,
"loss": 1.2845,
"step": 28870
},
{
"epoch": 21.58,
"learning_rate": 1.5584343685987932e-05,
"loss": 1.2627,
"step": 28880
},
{
"epoch": 21.58,
"learning_rate": 1.5570503238664673e-05,
"loss": 1.2342,
"step": 28890
},
{
"epoch": 21.59,
"learning_rate": 1.5556662791341418e-05,
"loss": 1.2705,
"step": 28900
},
{
"epoch": 21.6,
"learning_rate": 1.554282234401816e-05,
"loss": 1.2922,
"step": 28910
},
{
"epoch": 21.61,
"learning_rate": 1.5528981896694904e-05,
"loss": 1.2736,
"step": 28920
},
{
"epoch": 21.61,
"learning_rate": 1.5515141449371645e-05,
"loss": 1.3096,
"step": 28930
},
{
"epoch": 21.62,
"learning_rate": 1.550130100204839e-05,
"loss": 1.2725,
"step": 28940
},
{
"epoch": 21.63,
"learning_rate": 1.548746055472513e-05,
"loss": 1.2719,
"step": 28950
},
{
"epoch": 21.64,
"learning_rate": 1.5473620107401872e-05,
"loss": 1.2649,
"step": 28960
},
{
"epoch": 21.64,
"learning_rate": 1.5459779660078613e-05,
"loss": 1.2727,
"step": 28970
},
{
"epoch": 21.65,
"learning_rate": 1.5445939212755354e-05,
"loss": 1.2695,
"step": 28980
},
{
"epoch": 21.66,
"learning_rate": 1.54320987654321e-05,
"loss": 1.2836,
"step": 28990
},
{
"epoch": 21.67,
"learning_rate": 1.541825831810884e-05,
"loss": 1.2365,
"step": 29000
},
{
"epoch": 21.67,
"learning_rate": 1.5404417870785585e-05,
"loss": 1.2548,
"step": 29010
},
{
"epoch": 21.68,
"learning_rate": 1.5390577423462326e-05,
"loss": 1.3083,
"step": 29020
},
{
"epoch": 21.69,
"learning_rate": 1.537673697613907e-05,
"loss": 1.2701,
"step": 29030
},
{
"epoch": 21.7,
"learning_rate": 1.5362896528815812e-05,
"loss": 1.2884,
"step": 29040
},
{
"epoch": 21.7,
"learning_rate": 1.5349056081492553e-05,
"loss": 1.2789,
"step": 29050
},
{
"epoch": 21.71,
"learning_rate": 1.5335215634169298e-05,
"loss": 1.2309,
"step": 29060
},
{
"epoch": 21.72,
"learning_rate": 1.532137518684604e-05,
"loss": 1.2414,
"step": 29070
},
{
"epoch": 21.73,
"learning_rate": 1.5307534739522784e-05,
"loss": 1.2592,
"step": 29080
},
{
"epoch": 21.73,
"learning_rate": 1.5293694292199525e-05,
"loss": 1.2567,
"step": 29090
},
{
"epoch": 21.74,
"learning_rate": 1.527985384487627e-05,
"loss": 1.2564,
"step": 29100
},
{
"epoch": 21.75,
"learning_rate": 1.5266013397553008e-05,
"loss": 1.2578,
"step": 29110
},
{
"epoch": 21.76,
"learning_rate": 1.525217295022975e-05,
"loss": 1.2218,
"step": 29120
},
{
"epoch": 21.76,
"learning_rate": 1.5238332502906494e-05,
"loss": 1.2795,
"step": 29130
},
{
"epoch": 21.77,
"learning_rate": 1.5224492055583237e-05,
"loss": 1.2763,
"step": 29140
},
{
"epoch": 21.78,
"learning_rate": 1.521065160825998e-05,
"loss": 1.2665,
"step": 29150
},
{
"epoch": 21.79,
"learning_rate": 1.5196811160936722e-05,
"loss": 1.2681,
"step": 29160
},
{
"epoch": 21.79,
"learning_rate": 1.5182970713613465e-05,
"loss": 1.2926,
"step": 29170
},
{
"epoch": 21.8,
"learning_rate": 1.5169130266290207e-05,
"loss": 1.317,
"step": 29180
},
{
"epoch": 21.81,
"learning_rate": 1.515528981896695e-05,
"loss": 1.2455,
"step": 29190
},
{
"epoch": 21.82,
"learning_rate": 1.5141449371643693e-05,
"loss": 1.2605,
"step": 29200
},
{
"epoch": 21.82,
"learning_rate": 1.5127608924320436e-05,
"loss": 1.2793,
"step": 29210
},
{
"epoch": 21.83,
"learning_rate": 1.5113768476997178e-05,
"loss": 1.2823,
"step": 29220
},
{
"epoch": 21.84,
"learning_rate": 1.5099928029673921e-05,
"loss": 1.2504,
"step": 29230
},
{
"epoch": 21.85,
"learning_rate": 1.5086087582350664e-05,
"loss": 1.2982,
"step": 29240
},
{
"epoch": 21.85,
"learning_rate": 1.5072247135027404e-05,
"loss": 1.2854,
"step": 29250
},
{
"epoch": 21.86,
"learning_rate": 1.5058406687704147e-05,
"loss": 1.275,
"step": 29260
},
{
"epoch": 21.87,
"learning_rate": 1.504456624038089e-05,
"loss": 1.2497,
"step": 29270
},
{
"epoch": 21.88,
"learning_rate": 1.5030725793057631e-05,
"loss": 1.2596,
"step": 29280
},
{
"epoch": 21.88,
"learning_rate": 1.5016885345734374e-05,
"loss": 1.2798,
"step": 29290
},
{
"epoch": 21.89,
"learning_rate": 1.5003044898411117e-05,
"loss": 1.2784,
"step": 29300
},
{
"epoch": 21.9,
"learning_rate": 1.498920445108786e-05,
"loss": 1.2899,
"step": 29310
},
{
"epoch": 21.91,
"learning_rate": 1.4975364003764603e-05,
"loss": 1.2837,
"step": 29320
},
{
"epoch": 21.91,
"learning_rate": 1.4961523556441346e-05,
"loss": 1.254,
"step": 29330
},
{
"epoch": 21.92,
"learning_rate": 1.4947683109118089e-05,
"loss": 1.2782,
"step": 29340
},
{
"epoch": 21.93,
"learning_rate": 1.493384266179483e-05,
"loss": 1.3035,
"step": 29350
},
{
"epoch": 21.94,
"learning_rate": 1.4920002214471573e-05,
"loss": 1.2693,
"step": 29360
},
{
"epoch": 21.94,
"learning_rate": 1.4906161767148316e-05,
"loss": 1.2639,
"step": 29370
},
{
"epoch": 21.95,
"learning_rate": 1.4892321319825055e-05,
"loss": 1.2399,
"step": 29380
},
{
"epoch": 21.96,
"learning_rate": 1.4878480872501798e-05,
"loss": 1.1976,
"step": 29390
},
{
"epoch": 21.96,
"learning_rate": 1.4864640425178541e-05,
"loss": 1.2847,
"step": 29400
},
{
"epoch": 21.97,
"learning_rate": 1.4850799977855284e-05,
"loss": 1.2955,
"step": 29410
},
{
"epoch": 21.98,
"learning_rate": 1.4836959530532027e-05,
"loss": 1.2389,
"step": 29420
},
{
"epoch": 21.99,
"learning_rate": 1.482311908320877e-05,
"loss": 1.2782,
"step": 29430
},
{
"epoch": 21.99,
"learning_rate": 1.4809278635885513e-05,
"loss": 1.2957,
"step": 29440
},
{
"epoch": 22.0,
"eval_accuracy": 0.5903146173643574,
"eval_loss": 0.9550768733024597,
"eval_runtime": 73.6398,
"eval_samples_per_second": 258.542,
"eval_steps_per_second": 8.08,
"step": 29447
},
{
"epoch": 22.0,
"learning_rate": 1.4795438188562254e-05,
"loss": 1.289,
"step": 29450
},
{
"epoch": 22.01,
"learning_rate": 1.4781597741238997e-05,
"loss": 1.2822,
"step": 29460
},
{
"epoch": 22.02,
"learning_rate": 1.476775729391574e-05,
"loss": 1.2472,
"step": 29470
},
{
"epoch": 22.02,
"learning_rate": 1.4753916846592483e-05,
"loss": 1.2854,
"step": 29480
},
{
"epoch": 22.03,
"learning_rate": 1.4740076399269226e-05,
"loss": 1.2809,
"step": 29490
},
{
"epoch": 22.04,
"learning_rate": 1.4726235951945969e-05,
"loss": 1.2369,
"step": 29500
},
{
"epoch": 22.05,
"learning_rate": 1.4712395504622712e-05,
"loss": 1.2761,
"step": 29510
},
{
"epoch": 22.05,
"learning_rate": 1.4698555057299452e-05,
"loss": 1.2432,
"step": 29520
},
{
"epoch": 22.06,
"learning_rate": 1.4684714609976195e-05,
"loss": 1.2668,
"step": 29530
},
{
"epoch": 22.07,
"learning_rate": 1.4670874162652936e-05,
"loss": 1.2646,
"step": 29540
},
{
"epoch": 22.08,
"learning_rate": 1.4657033715329679e-05,
"loss": 1.3091,
"step": 29550
},
{
"epoch": 22.08,
"learning_rate": 1.4643193268006422e-05,
"loss": 1.2798,
"step": 29560
},
{
"epoch": 22.09,
"learning_rate": 1.4629352820683165e-05,
"loss": 1.2459,
"step": 29570
},
{
"epoch": 22.1,
"learning_rate": 1.4615512373359908e-05,
"loss": 1.289,
"step": 29580
},
{
"epoch": 22.11,
"learning_rate": 1.460167192603665e-05,
"loss": 1.2969,
"step": 29590
},
{
"epoch": 22.11,
"learning_rate": 1.4587831478713394e-05,
"loss": 1.3343,
"step": 29600
},
{
"epoch": 22.12,
"learning_rate": 1.4573991031390136e-05,
"loss": 1.281,
"step": 29610
},
{
"epoch": 22.13,
"learning_rate": 1.4560150584066878e-05,
"loss": 1.2457,
"step": 29620
},
{
"epoch": 22.14,
"learning_rate": 1.454631013674362e-05,
"loss": 1.1991,
"step": 29630
},
{
"epoch": 22.14,
"learning_rate": 1.4532469689420364e-05,
"loss": 1.2713,
"step": 29640
},
{
"epoch": 22.15,
"learning_rate": 1.4518629242097107e-05,
"loss": 1.2316,
"step": 29650
},
{
"epoch": 22.16,
"learning_rate": 1.4504788794773846e-05,
"loss": 1.2285,
"step": 29660
},
{
"epoch": 22.17,
"learning_rate": 1.4490948347450589e-05,
"loss": 1.2719,
"step": 29670
},
{
"epoch": 22.17,
"learning_rate": 1.4477107900127332e-05,
"loss": 1.2386,
"step": 29680
},
{
"epoch": 22.18,
"learning_rate": 1.4463267452804075e-05,
"loss": 1.3075,
"step": 29690
},
{
"epoch": 22.19,
"learning_rate": 1.4449427005480818e-05,
"loss": 1.2743,
"step": 29700
},
{
"epoch": 22.2,
"learning_rate": 1.443558655815756e-05,
"loss": 1.2638,
"step": 29710
},
{
"epoch": 22.2,
"learning_rate": 1.4421746110834302e-05,
"loss": 1.2604,
"step": 29720
},
{
"epoch": 22.21,
"learning_rate": 1.4407905663511045e-05,
"loss": 1.2844,
"step": 29730
},
{
"epoch": 22.22,
"learning_rate": 1.4394065216187788e-05,
"loss": 1.2271,
"step": 29740
},
{
"epoch": 22.23,
"learning_rate": 1.4380224768864531e-05,
"loss": 1.2696,
"step": 29750
},
{
"epoch": 22.23,
"learning_rate": 1.4366384321541274e-05,
"loss": 1.2822,
"step": 29760
},
{
"epoch": 22.24,
"learning_rate": 1.4352543874218017e-05,
"loss": 1.2535,
"step": 29770
},
{
"epoch": 22.25,
"learning_rate": 1.4338703426894758e-05,
"loss": 1.302,
"step": 29780
},
{
"epoch": 22.26,
"learning_rate": 1.4324862979571501e-05,
"loss": 1.2856,
"step": 29790
},
{
"epoch": 22.26,
"learning_rate": 1.4311022532248242e-05,
"loss": 1.2909,
"step": 29800
},
{
"epoch": 22.27,
"learning_rate": 1.4297182084924984e-05,
"loss": 1.258,
"step": 29810
},
{
"epoch": 22.28,
"learning_rate": 1.4283341637601727e-05,
"loss": 1.2596,
"step": 29820
},
{
"epoch": 22.29,
"learning_rate": 1.426950119027847e-05,
"loss": 1.2601,
"step": 29830
},
{
"epoch": 22.29,
"learning_rate": 1.4255660742955212e-05,
"loss": 1.2508,
"step": 29840
},
{
"epoch": 22.3,
"learning_rate": 1.4241820295631955e-05,
"loss": 1.2548,
"step": 29850
},
{
"epoch": 22.31,
"learning_rate": 1.4227979848308698e-05,
"loss": 1.2784,
"step": 29860
},
{
"epoch": 22.32,
"learning_rate": 1.4214139400985441e-05,
"loss": 1.2365,
"step": 29870
},
{
"epoch": 22.32,
"learning_rate": 1.4200298953662183e-05,
"loss": 1.2522,
"step": 29880
},
{
"epoch": 22.33,
"learning_rate": 1.4186458506338925e-05,
"loss": 1.2748,
"step": 29890
},
{
"epoch": 22.34,
"learning_rate": 1.4172618059015668e-05,
"loss": 1.2254,
"step": 29900
},
{
"epoch": 22.35,
"learning_rate": 1.4158777611692411e-05,
"loss": 1.2843,
"step": 29910
},
{
"epoch": 22.35,
"learning_rate": 1.4144937164369154e-05,
"loss": 1.24,
"step": 29920
},
{
"epoch": 22.36,
"learning_rate": 1.4131096717045897e-05,
"loss": 1.2525,
"step": 29930
},
{
"epoch": 22.37,
"learning_rate": 1.4117256269722637e-05,
"loss": 1.2791,
"step": 29940
},
{
"epoch": 22.38,
"learning_rate": 1.410341582239938e-05,
"loss": 1.2708,
"step": 29950
},
{
"epoch": 22.38,
"learning_rate": 1.4089575375076123e-05,
"loss": 1.252,
"step": 29960
},
{
"epoch": 22.39,
"learning_rate": 1.4075734927752866e-05,
"loss": 1.3089,
"step": 29970
},
{
"epoch": 22.4,
"learning_rate": 1.4061894480429607e-05,
"loss": 1.2454,
"step": 29980
},
{
"epoch": 22.41,
"learning_rate": 1.404805403310635e-05,
"loss": 1.307,
"step": 29990
},
{
"epoch": 22.41,
"learning_rate": 1.4034213585783093e-05,
"loss": 1.2596,
"step": 30000
},
{
"epoch": 22.42,
"learning_rate": 1.4020373138459836e-05,
"loss": 1.2509,
"step": 30010
},
{
"epoch": 22.43,
"learning_rate": 1.4006532691136579e-05,
"loss": 1.2703,
"step": 30020
},
{
"epoch": 22.44,
"learning_rate": 1.3992692243813322e-05,
"loss": 1.2691,
"step": 30030
},
{
"epoch": 22.44,
"learning_rate": 1.3978851796490065e-05,
"loss": 1.275,
"step": 30040
},
{
"epoch": 22.45,
"learning_rate": 1.3965011349166806e-05,
"loss": 1.2725,
"step": 30050
},
{
"epoch": 22.46,
"learning_rate": 1.3951170901843549e-05,
"loss": 1.3025,
"step": 30060
},
{
"epoch": 22.47,
"learning_rate": 1.3937330454520292e-05,
"loss": 1.2596,
"step": 30070
},
{
"epoch": 22.47,
"learning_rate": 1.3923490007197031e-05,
"loss": 1.2365,
"step": 30080
},
{
"epoch": 22.48,
"learning_rate": 1.3909649559873774e-05,
"loss": 1.2887,
"step": 30090
},
{
"epoch": 22.49,
"learning_rate": 1.3895809112550517e-05,
"loss": 1.2694,
"step": 30100
},
{
"epoch": 22.5,
"learning_rate": 1.388196866522726e-05,
"loss": 1.2649,
"step": 30110
},
{
"epoch": 22.5,
"learning_rate": 1.3868128217904003e-05,
"loss": 1.2559,
"step": 30120
},
{
"epoch": 22.51,
"learning_rate": 1.3854287770580746e-05,
"loss": 1.2759,
"step": 30130
},
{
"epoch": 22.52,
"learning_rate": 1.3840447323257489e-05,
"loss": 1.2584,
"step": 30140
},
{
"epoch": 22.53,
"learning_rate": 1.382660687593423e-05,
"loss": 1.273,
"step": 30150
},
{
"epoch": 22.53,
"learning_rate": 1.3812766428610973e-05,
"loss": 1.2514,
"step": 30160
},
{
"epoch": 22.54,
"learning_rate": 1.3798925981287716e-05,
"loss": 1.292,
"step": 30170
},
{
"epoch": 22.55,
"learning_rate": 1.3785085533964459e-05,
"loss": 1.281,
"step": 30180
},
{
"epoch": 22.56,
"learning_rate": 1.3771245086641202e-05,
"loss": 1.289,
"step": 30190
},
{
"epoch": 22.56,
"learning_rate": 1.3757404639317945e-05,
"loss": 1.2586,
"step": 30200
},
{
"epoch": 22.57,
"learning_rate": 1.3743564191994688e-05,
"loss": 1.2663,
"step": 30210
},
{
"epoch": 22.58,
"learning_rate": 1.3729723744671428e-05,
"loss": 1.278,
"step": 30220
},
{
"epoch": 22.58,
"learning_rate": 1.371588329734817e-05,
"loss": 1.2814,
"step": 30230
},
{
"epoch": 22.59,
"learning_rate": 1.3702042850024913e-05,
"loss": 1.265,
"step": 30240
},
{
"epoch": 22.6,
"learning_rate": 1.3688202402701655e-05,
"loss": 1.2518,
"step": 30250
},
{
"epoch": 22.61,
"learning_rate": 1.3674361955378398e-05,
"loss": 1.2702,
"step": 30260
},
{
"epoch": 22.61,
"learning_rate": 1.366052150805514e-05,
"loss": 1.2774,
"step": 30270
},
{
"epoch": 22.62,
"learning_rate": 1.3646681060731884e-05,
"loss": 1.2215,
"step": 30280
},
{
"epoch": 22.63,
"learning_rate": 1.3632840613408626e-05,
"loss": 1.2403,
"step": 30290
},
{
"epoch": 22.64,
"learning_rate": 1.361900016608537e-05,
"loss": 1.2697,
"step": 30300
},
{
"epoch": 22.64,
"learning_rate": 1.3605159718762112e-05,
"loss": 1.3078,
"step": 30310
},
{
"epoch": 22.65,
"learning_rate": 1.3591319271438854e-05,
"loss": 1.2268,
"step": 30320
},
{
"epoch": 22.66,
"learning_rate": 1.3577478824115597e-05,
"loss": 1.2816,
"step": 30330
},
{
"epoch": 22.67,
"learning_rate": 1.356363837679234e-05,
"loss": 1.269,
"step": 30340
},
{
"epoch": 22.67,
"learning_rate": 1.3549797929469082e-05,
"loss": 1.2678,
"step": 30350
},
{
"epoch": 22.68,
"learning_rate": 1.3535957482145822e-05,
"loss": 1.2331,
"step": 30360
},
{
"epoch": 22.69,
"learning_rate": 1.3522117034822565e-05,
"loss": 1.2652,
"step": 30370
},
{
"epoch": 22.7,
"learning_rate": 1.3508276587499308e-05,
"loss": 1.2558,
"step": 30380
},
{
"epoch": 22.7,
"learning_rate": 1.3494436140176051e-05,
"loss": 1.2921,
"step": 30390
},
{
"epoch": 22.71,
"learning_rate": 1.3480595692852794e-05,
"loss": 1.2919,
"step": 30400
},
{
"epoch": 22.72,
"learning_rate": 1.3466755245529537e-05,
"loss": 1.2742,
"step": 30410
},
{
"epoch": 22.73,
"learning_rate": 1.3452914798206278e-05,
"loss": 1.2925,
"step": 30420
},
{
"epoch": 22.73,
"learning_rate": 1.3439074350883021e-05,
"loss": 1.2689,
"step": 30430
},
{
"epoch": 22.74,
"learning_rate": 1.3425233903559764e-05,
"loss": 1.2552,
"step": 30440
},
{
"epoch": 22.75,
"learning_rate": 1.3411393456236507e-05,
"loss": 1.2819,
"step": 30450
},
{
"epoch": 22.76,
"learning_rate": 1.339755300891325e-05,
"loss": 1.2724,
"step": 30460
},
{
"epoch": 22.76,
"learning_rate": 1.3383712561589993e-05,
"loss": 1.275,
"step": 30470
},
{
"epoch": 22.77,
"learning_rate": 1.3369872114266736e-05,
"loss": 1.2736,
"step": 30480
},
{
"epoch": 22.78,
"learning_rate": 1.3356031666943477e-05,
"loss": 1.2768,
"step": 30490
},
{
"epoch": 22.79,
"learning_rate": 1.3342191219620218e-05,
"loss": 1.2921,
"step": 30500
},
{
"epoch": 22.79,
"learning_rate": 1.332835077229696e-05,
"loss": 1.2446,
"step": 30510
},
{
"epoch": 22.8,
"learning_rate": 1.3314510324973702e-05,
"loss": 1.2779,
"step": 30520
},
{
"epoch": 22.81,
"learning_rate": 1.3300669877650445e-05,
"loss": 1.2653,
"step": 30530
},
{
"epoch": 22.82,
"learning_rate": 1.3286829430327188e-05,
"loss": 1.252,
"step": 30540
},
{
"epoch": 22.82,
"learning_rate": 1.3272988983003931e-05,
"loss": 1.2491,
"step": 30550
},
{
"epoch": 22.83,
"learning_rate": 1.3259148535680674e-05,
"loss": 1.3089,
"step": 30560
},
{
"epoch": 22.84,
"learning_rate": 1.3245308088357417e-05,
"loss": 1.2652,
"step": 30570
},
{
"epoch": 22.85,
"learning_rate": 1.3231467641034158e-05,
"loss": 1.3159,
"step": 30580
},
{
"epoch": 22.85,
"learning_rate": 1.3217627193710901e-05,
"loss": 1.2565,
"step": 30590
},
{
"epoch": 22.86,
"learning_rate": 1.3203786746387644e-05,
"loss": 1.2599,
"step": 30600
},
{
"epoch": 22.87,
"learning_rate": 1.3189946299064387e-05,
"loss": 1.2959,
"step": 30610
},
{
"epoch": 22.88,
"learning_rate": 1.317610585174113e-05,
"loss": 1.2645,
"step": 30620
},
{
"epoch": 22.88,
"learning_rate": 1.316226540441787e-05,
"loss": 1.2598,
"step": 30630
},
{
"epoch": 22.89,
"learning_rate": 1.3148424957094613e-05,
"loss": 1.2318,
"step": 30640
},
{
"epoch": 22.9,
"learning_rate": 1.3134584509771356e-05,
"loss": 1.2806,
"step": 30650
},
{
"epoch": 22.91,
"learning_rate": 1.3120744062448099e-05,
"loss": 1.2577,
"step": 30660
},
{
"epoch": 22.91,
"learning_rate": 1.3106903615124842e-05,
"loss": 1.2378,
"step": 30670
},
{
"epoch": 22.92,
"learning_rate": 1.3093063167801583e-05,
"loss": 1.2762,
"step": 30680
},
{
"epoch": 22.93,
"learning_rate": 1.3079222720478326e-05,
"loss": 1.2653,
"step": 30690
},
{
"epoch": 22.94,
"learning_rate": 1.3065382273155069e-05,
"loss": 1.2695,
"step": 30700
},
{
"epoch": 22.94,
"learning_rate": 1.3051541825831812e-05,
"loss": 1.2823,
"step": 30710
},
{
"epoch": 22.95,
"learning_rate": 1.3037701378508555e-05,
"loss": 1.2837,
"step": 30720
},
{
"epoch": 22.96,
"learning_rate": 1.3023860931185298e-05,
"loss": 1.2792,
"step": 30730
},
{
"epoch": 22.97,
"learning_rate": 1.301002048386204e-05,
"loss": 1.2976,
"step": 30740
},
{
"epoch": 22.97,
"learning_rate": 1.2996180036538782e-05,
"loss": 1.2743,
"step": 30750
},
{
"epoch": 22.98,
"learning_rate": 1.2982339589215525e-05,
"loss": 1.2922,
"step": 30760
},
{
"epoch": 22.99,
"learning_rate": 1.2968499141892266e-05,
"loss": 1.2768,
"step": 30770
},
{
"epoch": 23.0,
"learning_rate": 1.2954658694569007e-05,
"loss": 1.2579,
"step": 30780
},
{
"epoch": 23.0,
"eval_accuracy": 0.6052838909606597,
"eval_loss": 0.9300490021705627,
"eval_runtime": 72.3752,
"eval_samples_per_second": 263.06,
"eval_steps_per_second": 8.221,
"step": 30785
},
{
"epoch": 23.0,
"learning_rate": 1.294081824724575e-05,
"loss": 1.2671,
"step": 30790
},
{
"epoch": 23.01,
"learning_rate": 1.2926977799922493e-05,
"loss": 1.2746,
"step": 30800
},
{
"epoch": 23.02,
"learning_rate": 1.2913137352599236e-05,
"loss": 1.261,
"step": 30810
},
{
"epoch": 23.03,
"learning_rate": 1.2899296905275979e-05,
"loss": 1.2665,
"step": 30820
},
{
"epoch": 23.03,
"learning_rate": 1.2885456457952722e-05,
"loss": 1.2921,
"step": 30830
},
{
"epoch": 23.04,
"learning_rate": 1.2871616010629465e-05,
"loss": 1.2671,
"step": 30840
},
{
"epoch": 23.05,
"learning_rate": 1.2857775563306206e-05,
"loss": 1.2761,
"step": 30850
},
{
"epoch": 23.06,
"learning_rate": 1.2843935115982949e-05,
"loss": 1.2546,
"step": 30860
},
{
"epoch": 23.06,
"learning_rate": 1.2830094668659692e-05,
"loss": 1.2551,
"step": 30870
},
{
"epoch": 23.07,
"learning_rate": 1.2816254221336435e-05,
"loss": 1.2463,
"step": 30880
},
{
"epoch": 23.08,
"learning_rate": 1.2802413774013178e-05,
"loss": 1.261,
"step": 30890
},
{
"epoch": 23.09,
"learning_rate": 1.2788573326689921e-05,
"loss": 1.2753,
"step": 30900
},
{
"epoch": 23.09,
"learning_rate": 1.277473287936666e-05,
"loss": 1.2731,
"step": 30910
},
{
"epoch": 23.1,
"learning_rate": 1.2760892432043403e-05,
"loss": 1.2915,
"step": 30920
},
{
"epoch": 23.11,
"learning_rate": 1.2747051984720146e-05,
"loss": 1.2852,
"step": 30930
},
{
"epoch": 23.12,
"learning_rate": 1.273321153739689e-05,
"loss": 1.2434,
"step": 30940
},
{
"epoch": 23.12,
"learning_rate": 1.271937109007363e-05,
"loss": 1.2582,
"step": 30950
},
{
"epoch": 23.13,
"learning_rate": 1.2705530642750373e-05,
"loss": 1.2561,
"step": 30960
},
{
"epoch": 23.14,
"learning_rate": 1.2691690195427116e-05,
"loss": 1.2489,
"step": 30970
},
{
"epoch": 23.15,
"learning_rate": 1.267784974810386e-05,
"loss": 1.2704,
"step": 30980
},
{
"epoch": 23.15,
"learning_rate": 1.2664009300780602e-05,
"loss": 1.2235,
"step": 30990
},
{
"epoch": 23.16,
"learning_rate": 1.2650168853457345e-05,
"loss": 1.2224,
"step": 31000
},
{
"epoch": 23.17,
"learning_rate": 1.2636328406134088e-05,
"loss": 1.2508,
"step": 31010
},
{
"epoch": 23.18,
"learning_rate": 1.262248795881083e-05,
"loss": 1.2666,
"step": 31020
},
{
"epoch": 23.18,
"learning_rate": 1.2608647511487572e-05,
"loss": 1.2557,
"step": 31030
},
{
"epoch": 23.19,
"learning_rate": 1.2594807064164315e-05,
"loss": 1.2883,
"step": 31040
},
{
"epoch": 23.2,
"learning_rate": 1.2580966616841055e-05,
"loss": 1.2902,
"step": 31050
},
{
"epoch": 23.21,
"learning_rate": 1.2567126169517798e-05,
"loss": 1.2486,
"step": 31060
},
{
"epoch": 23.21,
"learning_rate": 1.255328572219454e-05,
"loss": 1.2386,
"step": 31070
},
{
"epoch": 23.22,
"learning_rate": 1.2539445274871284e-05,
"loss": 1.2687,
"step": 31080
},
{
"epoch": 23.23,
"learning_rate": 1.2525604827548027e-05,
"loss": 1.1929,
"step": 31090
},
{
"epoch": 23.23,
"learning_rate": 1.251176438022477e-05,
"loss": 1.2504,
"step": 31100
},
{
"epoch": 23.24,
"learning_rate": 1.2497923932901513e-05,
"loss": 1.2719,
"step": 31110
},
{
"epoch": 23.25,
"learning_rate": 1.2484083485578254e-05,
"loss": 1.314,
"step": 31120
},
{
"epoch": 23.26,
"learning_rate": 1.2470243038254997e-05,
"loss": 1.2565,
"step": 31130
},
{
"epoch": 23.26,
"learning_rate": 1.245640259093174e-05,
"loss": 1.2509,
"step": 31140
},
{
"epoch": 23.27,
"learning_rate": 1.2442562143608483e-05,
"loss": 1.205,
"step": 31150
},
{
"epoch": 23.28,
"learning_rate": 1.2428721696285224e-05,
"loss": 1.2468,
"step": 31160
},
{
"epoch": 23.29,
"learning_rate": 1.2414881248961967e-05,
"loss": 1.2678,
"step": 31170
},
{
"epoch": 23.29,
"learning_rate": 1.240104080163871e-05,
"loss": 1.2374,
"step": 31180
},
{
"epoch": 23.3,
"learning_rate": 1.2387200354315453e-05,
"loss": 1.2849,
"step": 31190
},
{
"epoch": 23.31,
"learning_rate": 1.2373359906992194e-05,
"loss": 1.2692,
"step": 31200
},
{
"epoch": 23.32,
"learning_rate": 1.2359519459668937e-05,
"loss": 1.2835,
"step": 31210
},
{
"epoch": 23.32,
"learning_rate": 1.2345679012345678e-05,
"loss": 1.2755,
"step": 31220
},
{
"epoch": 23.33,
"learning_rate": 1.2331838565022421e-05,
"loss": 1.2533,
"step": 31230
},
{
"epoch": 23.34,
"learning_rate": 1.2317998117699164e-05,
"loss": 1.264,
"step": 31240
},
{
"epoch": 23.35,
"learning_rate": 1.2304157670375907e-05,
"loss": 1.2503,
"step": 31250
},
{
"epoch": 23.35,
"learning_rate": 1.229031722305265e-05,
"loss": 1.215,
"step": 31260
},
{
"epoch": 23.36,
"learning_rate": 1.2276476775729393e-05,
"loss": 1.271,
"step": 31270
},
{
"epoch": 23.37,
"learning_rate": 1.2262636328406136e-05,
"loss": 1.2658,
"step": 31280
},
{
"epoch": 23.38,
"learning_rate": 1.2248795881082877e-05,
"loss": 1.2674,
"step": 31290
},
{
"epoch": 23.38,
"learning_rate": 1.2234955433759618e-05,
"loss": 1.2248,
"step": 31300
},
{
"epoch": 23.39,
"learning_rate": 1.2221114986436361e-05,
"loss": 1.2544,
"step": 31310
},
{
"epoch": 23.4,
"learning_rate": 1.2207274539113104e-05,
"loss": 1.2549,
"step": 31320
},
{
"epoch": 23.41,
"learning_rate": 1.2193434091789847e-05,
"loss": 1.2385,
"step": 31330
},
{
"epoch": 23.41,
"learning_rate": 1.217959364446659e-05,
"loss": 1.2833,
"step": 31340
},
{
"epoch": 23.42,
"learning_rate": 1.2165753197143333e-05,
"loss": 1.2763,
"step": 31350
},
{
"epoch": 23.43,
"learning_rate": 1.2151912749820074e-05,
"loss": 1.2671,
"step": 31360
},
{
"epoch": 23.44,
"learning_rate": 1.2138072302496817e-05,
"loss": 1.2103,
"step": 31370
},
{
"epoch": 23.44,
"learning_rate": 1.212423185517356e-05,
"loss": 1.2588,
"step": 31380
},
{
"epoch": 23.45,
"learning_rate": 1.2110391407850302e-05,
"loss": 1.2578,
"step": 31390
},
{
"epoch": 23.46,
"learning_rate": 1.2096550960527045e-05,
"loss": 1.2713,
"step": 31400
},
{
"epoch": 23.47,
"learning_rate": 1.2082710513203788e-05,
"loss": 1.251,
"step": 31410
},
{
"epoch": 23.47,
"learning_rate": 1.206887006588053e-05,
"loss": 1.2709,
"step": 31420
},
{
"epoch": 23.48,
"learning_rate": 1.2055029618557272e-05,
"loss": 1.2804,
"step": 31430
},
{
"epoch": 23.49,
"learning_rate": 1.2041189171234015e-05,
"loss": 1.2955,
"step": 31440
},
{
"epoch": 23.5,
"learning_rate": 1.2027348723910758e-05,
"loss": 1.2515,
"step": 31450
},
{
"epoch": 23.5,
"learning_rate": 1.20135082765875e-05,
"loss": 1.2246,
"step": 31460
},
{
"epoch": 23.51,
"learning_rate": 1.1999667829264242e-05,
"loss": 1.2327,
"step": 31470
},
{
"epoch": 23.52,
"learning_rate": 1.1985827381940985e-05,
"loss": 1.2813,
"step": 31480
},
{
"epoch": 23.53,
"learning_rate": 1.1971986934617728e-05,
"loss": 1.2428,
"step": 31490
},
{
"epoch": 23.53,
"learning_rate": 1.1958146487294469e-05,
"loss": 1.2285,
"step": 31500
},
{
"epoch": 23.54,
"learning_rate": 1.1944306039971212e-05,
"loss": 1.2729,
"step": 31510
},
{
"epoch": 23.55,
"learning_rate": 1.1930465592647955e-05,
"loss": 1.2506,
"step": 31520
},
{
"epoch": 23.56,
"learning_rate": 1.1916625145324698e-05,
"loss": 1.2517,
"step": 31530
},
{
"epoch": 23.56,
"learning_rate": 1.190278469800144e-05,
"loss": 1.268,
"step": 31540
},
{
"epoch": 23.57,
"learning_rate": 1.1888944250678182e-05,
"loss": 1.2577,
"step": 31550
},
{
"epoch": 23.58,
"learning_rate": 1.1875103803354925e-05,
"loss": 1.2558,
"step": 31560
},
{
"epoch": 23.59,
"learning_rate": 1.1861263356031666e-05,
"loss": 1.2557,
"step": 31570
},
{
"epoch": 23.59,
"learning_rate": 1.184742290870841e-05,
"loss": 1.2623,
"step": 31580
},
{
"epoch": 23.6,
"learning_rate": 1.1833582461385152e-05,
"loss": 1.2468,
"step": 31590
},
{
"epoch": 23.61,
"learning_rate": 1.1819742014061895e-05,
"loss": 1.2571,
"step": 31600
},
{
"epoch": 23.62,
"learning_rate": 1.1805901566738638e-05,
"loss": 1.2721,
"step": 31610
},
{
"epoch": 23.62,
"learning_rate": 1.1792061119415381e-05,
"loss": 1.2511,
"step": 31620
},
{
"epoch": 23.63,
"learning_rate": 1.1778220672092124e-05,
"loss": 1.2588,
"step": 31630
},
{
"epoch": 23.64,
"learning_rate": 1.1764380224768865e-05,
"loss": 1.302,
"step": 31640
},
{
"epoch": 23.65,
"learning_rate": 1.1750539777445606e-05,
"loss": 1.2476,
"step": 31650
},
{
"epoch": 23.65,
"learning_rate": 1.173669933012235e-05,
"loss": 1.3128,
"step": 31660
},
{
"epoch": 23.66,
"learning_rate": 1.1722858882799092e-05,
"loss": 1.23,
"step": 31670
},
{
"epoch": 23.67,
"learning_rate": 1.1709018435475835e-05,
"loss": 1.3055,
"step": 31680
},
{
"epoch": 23.68,
"learning_rate": 1.1695177988152578e-05,
"loss": 1.2773,
"step": 31690
},
{
"epoch": 23.68,
"learning_rate": 1.1681337540829321e-05,
"loss": 1.2474,
"step": 31700
},
{
"epoch": 23.69,
"learning_rate": 1.1667497093506062e-05,
"loss": 1.2572,
"step": 31710
},
{
"epoch": 23.7,
"learning_rate": 1.1653656646182805e-05,
"loss": 1.2493,
"step": 31720
},
{
"epoch": 23.71,
"learning_rate": 1.1639816198859548e-05,
"loss": 1.2766,
"step": 31730
},
{
"epoch": 23.71,
"learning_rate": 1.162597575153629e-05,
"loss": 1.2587,
"step": 31740
},
{
"epoch": 23.72,
"learning_rate": 1.1612135304213033e-05,
"loss": 1.2237,
"step": 31750
},
{
"epoch": 23.73,
"learning_rate": 1.1598294856889775e-05,
"loss": 1.2358,
"step": 31760
},
{
"epoch": 23.74,
"learning_rate": 1.1584454409566518e-05,
"loss": 1.2404,
"step": 31770
},
{
"epoch": 23.74,
"learning_rate": 1.157061396224326e-05,
"loss": 1.2429,
"step": 31780
},
{
"epoch": 23.75,
"learning_rate": 1.1556773514920003e-05,
"loss": 1.232,
"step": 31790
},
{
"epoch": 23.76,
"learning_rate": 1.1542933067596746e-05,
"loss": 1.2302,
"step": 31800
},
{
"epoch": 23.77,
"learning_rate": 1.1529092620273489e-05,
"loss": 1.2373,
"step": 31810
},
{
"epoch": 23.77,
"learning_rate": 1.151525217295023e-05,
"loss": 1.273,
"step": 31820
},
{
"epoch": 23.78,
"learning_rate": 1.1501411725626973e-05,
"loss": 1.2583,
"step": 31830
},
{
"epoch": 23.79,
"learning_rate": 1.1487571278303716e-05,
"loss": 1.2423,
"step": 31840
},
{
"epoch": 23.8,
"learning_rate": 1.1473730830980457e-05,
"loss": 1.2325,
"step": 31850
},
{
"epoch": 23.8,
"learning_rate": 1.14598903836572e-05,
"loss": 1.2695,
"step": 31860
},
{
"epoch": 23.81,
"learning_rate": 1.1446049936333943e-05,
"loss": 1.2787,
"step": 31870
},
{
"epoch": 23.82,
"learning_rate": 1.1432209489010686e-05,
"loss": 1.2391,
"step": 31880
},
{
"epoch": 23.83,
"learning_rate": 1.1418369041687429e-05,
"loss": 1.2184,
"step": 31890
},
{
"epoch": 23.83,
"learning_rate": 1.1404528594364172e-05,
"loss": 1.255,
"step": 31900
},
{
"epoch": 23.84,
"learning_rate": 1.1390688147040913e-05,
"loss": 1.3011,
"step": 31910
},
{
"epoch": 23.85,
"learning_rate": 1.1376847699717654e-05,
"loss": 1.2918,
"step": 31920
},
{
"epoch": 23.86,
"learning_rate": 1.1363007252394397e-05,
"loss": 1.3301,
"step": 31930
},
{
"epoch": 23.86,
"learning_rate": 1.134916680507114e-05,
"loss": 1.2684,
"step": 31940
},
{
"epoch": 23.87,
"learning_rate": 1.1335326357747883e-05,
"loss": 1.2155,
"step": 31950
},
{
"epoch": 23.88,
"learning_rate": 1.1321485910424626e-05,
"loss": 1.2853,
"step": 31960
},
{
"epoch": 23.88,
"learning_rate": 1.1307645463101369e-05,
"loss": 1.2457,
"step": 31970
},
{
"epoch": 23.89,
"learning_rate": 1.1293805015778112e-05,
"loss": 1.2634,
"step": 31980
},
{
"epoch": 23.9,
"learning_rate": 1.1279964568454853e-05,
"loss": 1.2696,
"step": 31990
},
{
"epoch": 23.91,
"learning_rate": 1.1266124121131594e-05,
"loss": 1.2743,
"step": 32000
},
{
"epoch": 23.91,
"learning_rate": 1.1252283673808337e-05,
"loss": 1.2516,
"step": 32010
},
{
"epoch": 23.92,
"learning_rate": 1.123844322648508e-05,
"loss": 1.2643,
"step": 32020
},
{
"epoch": 23.93,
"learning_rate": 1.1224602779161823e-05,
"loss": 1.2775,
"step": 32030
},
{
"epoch": 23.94,
"learning_rate": 1.1210762331838566e-05,
"loss": 1.2458,
"step": 32040
},
{
"epoch": 23.94,
"learning_rate": 1.1196921884515309e-05,
"loss": 1.2714,
"step": 32050
},
{
"epoch": 23.95,
"learning_rate": 1.118308143719205e-05,
"loss": 1.2449,
"step": 32060
},
{
"epoch": 23.96,
"learning_rate": 1.1169240989868793e-05,
"loss": 1.2823,
"step": 32070
},
{
"epoch": 23.97,
"learning_rate": 1.1155400542545536e-05,
"loss": 1.2785,
"step": 32080
},
{
"epoch": 23.97,
"learning_rate": 1.1141560095222277e-05,
"loss": 1.2525,
"step": 32090
},
{
"epoch": 23.98,
"learning_rate": 1.112771964789902e-05,
"loss": 1.2655,
"step": 32100
},
{
"epoch": 23.99,
"learning_rate": 1.1113879200575763e-05,
"loss": 1.3122,
"step": 32110
},
{
"epoch": 24.0,
"learning_rate": 1.1100038753252506e-05,
"loss": 1.2475,
"step": 32120
},
{
"epoch": 24.0,
"eval_accuracy": 0.6049162245916277,
"eval_loss": 0.9295793771743774,
"eval_runtime": 68.7909,
"eval_samples_per_second": 276.766,
"eval_steps_per_second": 8.649,
"step": 32124
},
{
"epoch": 24.0,
"learning_rate": 1.1086198305929248e-05,
"loss": 1.2502,
"step": 32130
},
{
"epoch": 24.01,
"learning_rate": 1.107235785860599e-05,
"loss": 1.2764,
"step": 32140
},
{
"epoch": 24.02,
"learning_rate": 1.1058517411282733e-05,
"loss": 1.2801,
"step": 32150
},
{
"epoch": 24.03,
"learning_rate": 1.1044676963959476e-05,
"loss": 1.2411,
"step": 32160
},
{
"epoch": 24.03,
"learning_rate": 1.1030836516636218e-05,
"loss": 1.2753,
"step": 32170
},
{
"epoch": 24.04,
"learning_rate": 1.101699606931296e-05,
"loss": 1.2706,
"step": 32180
},
{
"epoch": 24.05,
"learning_rate": 1.1003155621989704e-05,
"loss": 1.2471,
"step": 32190
},
{
"epoch": 24.06,
"learning_rate": 1.0989315174666445e-05,
"loss": 1.2326,
"step": 32200
},
{
"epoch": 24.06,
"learning_rate": 1.0975474727343188e-05,
"loss": 1.2582,
"step": 32210
},
{
"epoch": 24.07,
"learning_rate": 1.096163428001993e-05,
"loss": 1.2592,
"step": 32220
},
{
"epoch": 24.08,
"learning_rate": 1.0947793832696674e-05,
"loss": 1.2321,
"step": 32230
},
{
"epoch": 24.09,
"learning_rate": 1.0933953385373417e-05,
"loss": 1.2602,
"step": 32240
},
{
"epoch": 24.09,
"learning_rate": 1.092011293805016e-05,
"loss": 1.2429,
"step": 32250
},
{
"epoch": 24.1,
"learning_rate": 1.09062724907269e-05,
"loss": 1.2696,
"step": 32260
},
{
"epoch": 24.11,
"learning_rate": 1.0892432043403642e-05,
"loss": 1.2644,
"step": 32270
},
{
"epoch": 24.12,
"learning_rate": 1.0878591596080385e-05,
"loss": 1.2865,
"step": 32280
},
{
"epoch": 24.12,
"learning_rate": 1.0864751148757128e-05,
"loss": 1.2369,
"step": 32290
},
{
"epoch": 24.13,
"learning_rate": 1.0850910701433871e-05,
"loss": 1.2709,
"step": 32300
},
{
"epoch": 24.14,
"learning_rate": 1.0837070254110614e-05,
"loss": 1.2689,
"step": 32310
},
{
"epoch": 24.15,
"learning_rate": 1.0823229806787357e-05,
"loss": 1.2238,
"step": 32320
},
{
"epoch": 24.15,
"learning_rate": 1.08093893594641e-05,
"loss": 1.2108,
"step": 32330
},
{
"epoch": 24.16,
"learning_rate": 1.0795548912140841e-05,
"loss": 1.2876,
"step": 32340
},
{
"epoch": 24.17,
"learning_rate": 1.0781708464817582e-05,
"loss": 1.2402,
"step": 32350
},
{
"epoch": 24.18,
"learning_rate": 1.0767868017494325e-05,
"loss": 1.2453,
"step": 32360
},
{
"epoch": 24.18,
"learning_rate": 1.0754027570171068e-05,
"loss": 1.2228,
"step": 32370
},
{
"epoch": 24.19,
"learning_rate": 1.0740187122847811e-05,
"loss": 1.2633,
"step": 32380
},
{
"epoch": 24.2,
"learning_rate": 1.0726346675524554e-05,
"loss": 1.2915,
"step": 32390
},
{
"epoch": 24.21,
"learning_rate": 1.0712506228201295e-05,
"loss": 1.251,
"step": 32400
},
{
"epoch": 24.21,
"learning_rate": 1.0698665780878038e-05,
"loss": 1.2576,
"step": 32410
},
{
"epoch": 24.22,
"learning_rate": 1.0684825333554781e-05,
"loss": 1.2721,
"step": 32420
},
{
"epoch": 24.23,
"learning_rate": 1.0670984886231524e-05,
"loss": 1.2666,
"step": 32430
},
{
"epoch": 24.24,
"learning_rate": 1.0657144438908265e-05,
"loss": 1.2577,
"step": 32440
},
{
"epoch": 24.24,
"learning_rate": 1.0643303991585008e-05,
"loss": 1.285,
"step": 32450
},
{
"epoch": 24.25,
"learning_rate": 1.0629463544261751e-05,
"loss": 1.22,
"step": 32460
},
{
"epoch": 24.26,
"learning_rate": 1.0615623096938493e-05,
"loss": 1.2553,
"step": 32470
},
{
"epoch": 24.27,
"learning_rate": 1.0601782649615236e-05,
"loss": 1.2501,
"step": 32480
},
{
"epoch": 24.27,
"learning_rate": 1.0587942202291978e-05,
"loss": 1.2742,
"step": 32490
},
{
"epoch": 24.28,
"learning_rate": 1.0574101754968721e-05,
"loss": 1.2808,
"step": 32500
},
{
"epoch": 24.29,
"learning_rate": 1.0560261307645464e-05,
"loss": 1.2419,
"step": 32510
},
{
"epoch": 24.3,
"learning_rate": 1.0546420860322206e-05,
"loss": 1.24,
"step": 32520
},
{
"epoch": 24.3,
"learning_rate": 1.0532580412998949e-05,
"loss": 1.2673,
"step": 32530
},
{
"epoch": 24.31,
"learning_rate": 1.051873996567569e-05,
"loss": 1.2181,
"step": 32540
},
{
"epoch": 24.32,
"learning_rate": 1.0504899518352433e-05,
"loss": 1.2233,
"step": 32550
},
{
"epoch": 24.33,
"learning_rate": 1.0491059071029176e-05,
"loss": 1.2267,
"step": 32560
},
{
"epoch": 24.33,
"learning_rate": 1.0477218623705919e-05,
"loss": 1.2447,
"step": 32570
},
{
"epoch": 24.34,
"learning_rate": 1.0463378176382662e-05,
"loss": 1.2468,
"step": 32580
},
{
"epoch": 24.35,
"learning_rate": 1.0449537729059405e-05,
"loss": 1.2917,
"step": 32590
},
{
"epoch": 24.36,
"learning_rate": 1.0435697281736148e-05,
"loss": 1.2426,
"step": 32600
},
{
"epoch": 24.36,
"learning_rate": 1.0421856834412889e-05,
"loss": 1.249,
"step": 32610
},
{
"epoch": 24.37,
"learning_rate": 1.040801638708963e-05,
"loss": 1.2667,
"step": 32620
},
{
"epoch": 24.38,
"learning_rate": 1.0394175939766373e-05,
"loss": 1.2267,
"step": 32630
},
{
"epoch": 24.39,
"learning_rate": 1.0380335492443116e-05,
"loss": 1.2507,
"step": 32640
},
{
"epoch": 24.39,
"learning_rate": 1.0366495045119859e-05,
"loss": 1.2444,
"step": 32650
},
{
"epoch": 24.4,
"learning_rate": 1.0352654597796602e-05,
"loss": 1.2859,
"step": 32660
},
{
"epoch": 24.41,
"learning_rate": 1.0338814150473345e-05,
"loss": 1.2659,
"step": 32670
},
{
"epoch": 24.42,
"learning_rate": 1.0324973703150086e-05,
"loss": 1.2439,
"step": 32680
},
{
"epoch": 24.42,
"learning_rate": 1.0311133255826829e-05,
"loss": 1.2301,
"step": 32690
},
{
"epoch": 24.43,
"learning_rate": 1.0297292808503572e-05,
"loss": 1.2278,
"step": 32700
},
{
"epoch": 24.44,
"learning_rate": 1.0283452361180313e-05,
"loss": 1.3042,
"step": 32710
},
{
"epoch": 24.45,
"learning_rate": 1.0269611913857056e-05,
"loss": 1.2782,
"step": 32720
},
{
"epoch": 24.45,
"learning_rate": 1.0255771466533799e-05,
"loss": 1.2542,
"step": 32730
},
{
"epoch": 24.46,
"learning_rate": 1.0241931019210542e-05,
"loss": 1.2658,
"step": 32740
},
{
"epoch": 24.47,
"learning_rate": 1.0228090571887283e-05,
"loss": 1.247,
"step": 32750
},
{
"epoch": 24.48,
"learning_rate": 1.0214250124564026e-05,
"loss": 1.2279,
"step": 32760
},
{
"epoch": 24.48,
"learning_rate": 1.020040967724077e-05,
"loss": 1.2553,
"step": 32770
},
{
"epoch": 24.49,
"learning_rate": 1.0186569229917512e-05,
"loss": 1.2938,
"step": 32780
},
{
"epoch": 24.5,
"learning_rate": 1.0172728782594253e-05,
"loss": 1.2549,
"step": 32790
},
{
"epoch": 24.51,
"learning_rate": 1.0158888335270996e-05,
"loss": 1.2497,
"step": 32800
},
{
"epoch": 24.51,
"learning_rate": 1.014504788794774e-05,
"loss": 1.2515,
"step": 32810
},
{
"epoch": 24.52,
"learning_rate": 1.013120744062448e-05,
"loss": 1.2567,
"step": 32820
},
{
"epoch": 24.53,
"learning_rate": 1.0117366993301223e-05,
"loss": 1.2276,
"step": 32830
},
{
"epoch": 24.53,
"learning_rate": 1.0103526545977966e-05,
"loss": 1.293,
"step": 32840
},
{
"epoch": 24.54,
"learning_rate": 1.008968609865471e-05,
"loss": 1.2822,
"step": 32850
},
{
"epoch": 24.55,
"learning_rate": 1.0075845651331452e-05,
"loss": 1.2761,
"step": 32860
},
{
"epoch": 24.56,
"learning_rate": 1.0062005204008194e-05,
"loss": 1.2373,
"step": 32870
},
{
"epoch": 24.56,
"learning_rate": 1.0048164756684937e-05,
"loss": 1.2374,
"step": 32880
},
{
"epoch": 24.57,
"learning_rate": 1.0034324309361678e-05,
"loss": 1.2752,
"step": 32890
},
{
"epoch": 24.58,
"learning_rate": 1.002048386203842e-05,
"loss": 1.2778,
"step": 32900
},
{
"epoch": 24.59,
"learning_rate": 1.0006643414715164e-05,
"loss": 1.2981,
"step": 32910
},
{
"epoch": 24.59,
"learning_rate": 9.992802967391907e-06,
"loss": 1.2649,
"step": 32920
},
{
"epoch": 24.6,
"learning_rate": 9.97896252006865e-06,
"loss": 1.2562,
"step": 32930
},
{
"epoch": 24.61,
"learning_rate": 9.965122072745393e-06,
"loss": 1.2204,
"step": 32940
},
{
"epoch": 24.62,
"learning_rate": 9.951281625422135e-06,
"loss": 1.2437,
"step": 32950
},
{
"epoch": 24.62,
"learning_rate": 9.937441178098877e-06,
"loss": 1.2192,
"step": 32960
},
{
"epoch": 24.63,
"learning_rate": 9.923600730775618e-06,
"loss": 1.2449,
"step": 32970
},
{
"epoch": 24.64,
"learning_rate": 9.909760283452361e-06,
"loss": 1.2744,
"step": 32980
},
{
"epoch": 24.65,
"learning_rate": 9.895919836129104e-06,
"loss": 1.249,
"step": 32990
},
{
"epoch": 24.65,
"learning_rate": 9.882079388805847e-06,
"loss": 1.2626,
"step": 33000
},
{
"epoch": 24.66,
"learning_rate": 9.86823894148259e-06,
"loss": 1.2343,
"step": 33010
},
{
"epoch": 24.67,
"learning_rate": 9.854398494159333e-06,
"loss": 1.2243,
"step": 33020
},
{
"epoch": 24.68,
"learning_rate": 9.840558046836074e-06,
"loss": 1.2647,
"step": 33030
},
{
"epoch": 24.68,
"learning_rate": 9.826717599512817e-06,
"loss": 1.2399,
"step": 33040
},
{
"epoch": 24.69,
"learning_rate": 9.81287715218956e-06,
"loss": 1.208,
"step": 33050
},
{
"epoch": 24.7,
"learning_rate": 9.799036704866301e-06,
"loss": 1.2358,
"step": 33060
},
{
"epoch": 24.71,
"learning_rate": 9.785196257543044e-06,
"loss": 1.2594,
"step": 33070
},
{
"epoch": 24.71,
"learning_rate": 9.771355810219787e-06,
"loss": 1.2801,
"step": 33080
},
{
"epoch": 24.72,
"learning_rate": 9.75751536289653e-06,
"loss": 1.2273,
"step": 33090
},
{
"epoch": 24.73,
"learning_rate": 9.743674915573271e-06,
"loss": 1.2545,
"step": 33100
},
{
"epoch": 24.74,
"learning_rate": 9.729834468250014e-06,
"loss": 1.3075,
"step": 33110
},
{
"epoch": 24.74,
"learning_rate": 9.715994020926757e-06,
"loss": 1.2681,
"step": 33120
},
{
"epoch": 24.75,
"learning_rate": 9.7021535736035e-06,
"loss": 1.2667,
"step": 33130
},
{
"epoch": 24.76,
"learning_rate": 9.688313126280241e-06,
"loss": 1.2963,
"step": 33140
},
{
"epoch": 24.77,
"learning_rate": 9.674472678956984e-06,
"loss": 1.2722,
"step": 33150
},
{
"epoch": 24.77,
"learning_rate": 9.660632231633727e-06,
"loss": 1.304,
"step": 33160
},
{
"epoch": 24.78,
"learning_rate": 9.646791784310468e-06,
"loss": 1.2164,
"step": 33170
},
{
"epoch": 24.79,
"learning_rate": 9.632951336987211e-06,
"loss": 1.2352,
"step": 33180
},
{
"epoch": 24.8,
"learning_rate": 9.619110889663954e-06,
"loss": 1.2484,
"step": 33190
},
{
"epoch": 24.8,
"learning_rate": 9.605270442340697e-06,
"loss": 1.2396,
"step": 33200
},
{
"epoch": 24.81,
"learning_rate": 9.59142999501744e-06,
"loss": 1.2758,
"step": 33210
},
{
"epoch": 24.82,
"learning_rate": 9.577589547694183e-06,
"loss": 1.262,
"step": 33220
},
{
"epoch": 24.83,
"learning_rate": 9.563749100370924e-06,
"loss": 1.2827,
"step": 33230
},
{
"epoch": 24.83,
"learning_rate": 9.549908653047666e-06,
"loss": 1.2595,
"step": 33240
},
{
"epoch": 24.84,
"learning_rate": 9.536068205724409e-06,
"loss": 1.2552,
"step": 33250
},
{
"epoch": 24.85,
"learning_rate": 9.522227758401152e-06,
"loss": 1.2403,
"step": 33260
},
{
"epoch": 24.86,
"learning_rate": 9.508387311077895e-06,
"loss": 1.2615,
"step": 33270
},
{
"epoch": 24.86,
"learning_rate": 9.494546863754637e-06,
"loss": 1.2424,
"step": 33280
},
{
"epoch": 24.87,
"learning_rate": 9.48070641643138e-06,
"loss": 1.2103,
"step": 33290
},
{
"epoch": 24.88,
"learning_rate": 9.466865969108123e-06,
"loss": 1.215,
"step": 33300
},
{
"epoch": 24.89,
"learning_rate": 9.453025521784865e-06,
"loss": 1.2221,
"step": 33310
},
{
"epoch": 24.89,
"learning_rate": 9.439185074461606e-06,
"loss": 1.3008,
"step": 33320
},
{
"epoch": 24.9,
"learning_rate": 9.425344627138349e-06,
"loss": 1.2704,
"step": 33330
},
{
"epoch": 24.91,
"learning_rate": 9.411504179815092e-06,
"loss": 1.2721,
"step": 33340
},
{
"epoch": 24.92,
"learning_rate": 9.397663732491835e-06,
"loss": 1.2304,
"step": 33350
},
{
"epoch": 24.92,
"learning_rate": 9.383823285168578e-06,
"loss": 1.2888,
"step": 33360
},
{
"epoch": 24.93,
"learning_rate": 9.36998283784532e-06,
"loss": 1.2386,
"step": 33370
},
{
"epoch": 24.94,
"learning_rate": 9.356142390522062e-06,
"loss": 1.2676,
"step": 33380
},
{
"epoch": 24.95,
"learning_rate": 9.342301943198805e-06,
"loss": 1.2608,
"step": 33390
},
{
"epoch": 24.95,
"learning_rate": 9.328461495875548e-06,
"loss": 1.2522,
"step": 33400
},
{
"epoch": 24.96,
"learning_rate": 9.314621048552289e-06,
"loss": 1.2391,
"step": 33410
},
{
"epoch": 24.97,
"learning_rate": 9.300780601229032e-06,
"loss": 1.2396,
"step": 33420
},
{
"epoch": 24.98,
"learning_rate": 9.286940153905775e-06,
"loss": 1.2406,
"step": 33430
},
{
"epoch": 24.98,
"learning_rate": 9.273099706582518e-06,
"loss": 1.2825,
"step": 33440
},
{
"epoch": 24.99,
"learning_rate": 9.259259259259259e-06,
"loss": 1.2347,
"step": 33450
},
{
"epoch": 25.0,
"learning_rate": 9.245418811936002e-06,
"loss": 1.2227,
"step": 33460
},
{
"epoch": 25.0,
"eval_accuracy": 0.6078575555438837,
"eval_loss": 0.9317153096199036,
"eval_runtime": 70.7651,
"eval_samples_per_second": 269.045,
"eval_steps_per_second": 8.408,
"step": 33462
},
{
"epoch": 25.01,
"learning_rate": 9.231578364612745e-06,
"loss": 1.2511,
"step": 33470
},
{
"epoch": 25.01,
"learning_rate": 9.217737917289488e-06,
"loss": 1.2987,
"step": 33480
},
{
"epoch": 25.02,
"learning_rate": 9.20389746996623e-06,
"loss": 1.2638,
"step": 33490
},
{
"epoch": 25.03,
"learning_rate": 9.190057022642972e-06,
"loss": 1.2553,
"step": 33500
},
{
"epoch": 25.04,
"learning_rate": 9.176216575319715e-06,
"loss": 1.2454,
"step": 33510
},
{
"epoch": 25.04,
"learning_rate": 9.162376127996456e-06,
"loss": 1.2998,
"step": 33520
},
{
"epoch": 25.05,
"learning_rate": 9.1485356806732e-06,
"loss": 1.2463,
"step": 33530
},
{
"epoch": 25.06,
"learning_rate": 9.134695233349942e-06,
"loss": 1.2441,
"step": 33540
},
{
"epoch": 25.07,
"learning_rate": 9.120854786026685e-06,
"loss": 1.2542,
"step": 33550
},
{
"epoch": 25.07,
"learning_rate": 9.107014338703428e-06,
"loss": 1.2571,
"step": 33560
},
{
"epoch": 25.08,
"learning_rate": 9.093173891380171e-06,
"loss": 1.2892,
"step": 33570
},
{
"epoch": 25.09,
"learning_rate": 9.079333444056912e-06,
"loss": 1.2367,
"step": 33580
},
{
"epoch": 25.1,
"learning_rate": 9.065492996733654e-06,
"loss": 1.26,
"step": 33590
},
{
"epoch": 25.1,
"learning_rate": 9.051652549410397e-06,
"loss": 1.2206,
"step": 33600
},
{
"epoch": 25.11,
"learning_rate": 9.03781210208714e-06,
"loss": 1.237,
"step": 33610
},
{
"epoch": 25.12,
"learning_rate": 9.023971654763882e-06,
"loss": 1.2736,
"step": 33620
},
{
"epoch": 25.13,
"learning_rate": 9.010131207440625e-06,
"loss": 1.2478,
"step": 33630
},
{
"epoch": 25.13,
"learning_rate": 8.996290760117368e-06,
"loss": 1.271,
"step": 33640
},
{
"epoch": 25.14,
"learning_rate": 8.98245031279411e-06,
"loss": 1.2563,
"step": 33650
},
{
"epoch": 25.15,
"learning_rate": 8.968609865470853e-06,
"loss": 1.2709,
"step": 33660
},
{
"epoch": 25.16,
"learning_rate": 8.954769418147596e-06,
"loss": 1.2366,
"step": 33670
},
{
"epoch": 25.16,
"learning_rate": 8.940928970824337e-06,
"loss": 1.2337,
"step": 33680
},
{
"epoch": 25.17,
"learning_rate": 8.92708852350108e-06,
"loss": 1.2349,
"step": 33690
},
{
"epoch": 25.18,
"learning_rate": 8.913248076177823e-06,
"loss": 1.2677,
"step": 33700
},
{
"epoch": 25.18,
"learning_rate": 8.899407628854566e-06,
"loss": 1.2356,
"step": 33710
},
{
"epoch": 25.19,
"learning_rate": 8.885567181531307e-06,
"loss": 1.2568,
"step": 33720
},
{
"epoch": 25.2,
"learning_rate": 8.87172673420805e-06,
"loss": 1.2308,
"step": 33730
},
{
"epoch": 25.21,
"learning_rate": 8.857886286884793e-06,
"loss": 1.2382,
"step": 33740
},
{
"epoch": 25.21,
"learning_rate": 8.844045839561536e-06,
"loss": 1.2827,
"step": 33750
},
{
"epoch": 25.22,
"learning_rate": 8.830205392238277e-06,
"loss": 1.243,
"step": 33760
},
{
"epoch": 25.23,
"learning_rate": 8.81636494491502e-06,
"loss": 1.2561,
"step": 33770
},
{
"epoch": 25.24,
"learning_rate": 8.802524497591763e-06,
"loss": 1.2482,
"step": 33780
},
{
"epoch": 25.24,
"learning_rate": 8.788684050268504e-06,
"loss": 1.2306,
"step": 33790
},
{
"epoch": 25.25,
"learning_rate": 8.774843602945247e-06,
"loss": 1.2397,
"step": 33800
},
{
"epoch": 25.26,
"learning_rate": 8.76100315562199e-06,
"loss": 1.2433,
"step": 33810
},
{
"epoch": 25.27,
"learning_rate": 8.747162708298733e-06,
"loss": 1.234,
"step": 33820
},
{
"epoch": 25.27,
"learning_rate": 8.733322260975476e-06,
"loss": 1.2543,
"step": 33830
},
{
"epoch": 25.28,
"learning_rate": 8.719481813652217e-06,
"loss": 1.265,
"step": 33840
},
{
"epoch": 25.29,
"learning_rate": 8.70564136632896e-06,
"loss": 1.2445,
"step": 33850
},
{
"epoch": 25.3,
"learning_rate": 8.691800919005701e-06,
"loss": 1.231,
"step": 33860
},
{
"epoch": 25.3,
"learning_rate": 8.677960471682444e-06,
"loss": 1.2122,
"step": 33870
},
{
"epoch": 25.31,
"learning_rate": 8.664120024359187e-06,
"loss": 1.2288,
"step": 33880
},
{
"epoch": 25.32,
"learning_rate": 8.65027957703593e-06,
"loss": 1.233,
"step": 33890
},
{
"epoch": 25.33,
"learning_rate": 8.636439129712673e-06,
"loss": 1.2791,
"step": 33900
},
{
"epoch": 25.33,
"learning_rate": 8.622598682389416e-06,
"loss": 1.2086,
"step": 33910
},
{
"epoch": 25.34,
"learning_rate": 8.608758235066159e-06,
"loss": 1.2275,
"step": 33920
},
{
"epoch": 25.35,
"learning_rate": 8.5949177877429e-06,
"loss": 1.2507,
"step": 33930
},
{
"epoch": 25.36,
"learning_rate": 8.581077340419642e-06,
"loss": 1.2637,
"step": 33940
},
{
"epoch": 25.36,
"learning_rate": 8.567236893096385e-06,
"loss": 1.2623,
"step": 33950
},
{
"epoch": 25.37,
"learning_rate": 8.553396445773127e-06,
"loss": 1.28,
"step": 33960
},
{
"epoch": 25.38,
"learning_rate": 8.53955599844987e-06,
"loss": 1.2408,
"step": 33970
},
{
"epoch": 25.39,
"learning_rate": 8.525715551126613e-06,
"loss": 1.2583,
"step": 33980
},
{
"epoch": 25.39,
"learning_rate": 8.511875103803356e-06,
"loss": 1.2483,
"step": 33990
},
{
"epoch": 25.4,
"learning_rate": 8.498034656480098e-06,
"loss": 1.2474,
"step": 34000
},
{
"epoch": 25.41,
"learning_rate": 8.48419420915684e-06,
"loss": 1.2107,
"step": 34010
},
{
"epoch": 25.42,
"learning_rate": 8.470353761833583e-06,
"loss": 1.2641,
"step": 34020
},
{
"epoch": 25.42,
"learning_rate": 8.456513314510325e-06,
"loss": 1.2886,
"step": 34030
},
{
"epoch": 25.43,
"learning_rate": 8.442672867187068e-06,
"loss": 1.2945,
"step": 34040
},
{
"epoch": 25.44,
"learning_rate": 8.42883241986381e-06,
"loss": 1.2671,
"step": 34050
},
{
"epoch": 25.45,
"learning_rate": 8.414991972540554e-06,
"loss": 1.2546,
"step": 34060
},
{
"epoch": 25.45,
"learning_rate": 8.401151525217295e-06,
"loss": 1.2347,
"step": 34070
},
{
"epoch": 25.46,
"learning_rate": 8.387311077894038e-06,
"loss": 1.2016,
"step": 34080
},
{
"epoch": 25.47,
"learning_rate": 8.37347063057078e-06,
"loss": 1.2132,
"step": 34090
},
{
"epoch": 25.48,
"learning_rate": 8.359630183247524e-06,
"loss": 1.2803,
"step": 34100
},
{
"epoch": 25.48,
"learning_rate": 8.345789735924265e-06,
"loss": 1.2468,
"step": 34110
},
{
"epoch": 25.49,
"learning_rate": 8.331949288601008e-06,
"loss": 1.2767,
"step": 34120
},
{
"epoch": 25.5,
"learning_rate": 8.31810884127775e-06,
"loss": 1.2663,
"step": 34130
},
{
"epoch": 25.51,
"learning_rate": 8.304268393954492e-06,
"loss": 1.2745,
"step": 34140
},
{
"epoch": 25.51,
"learning_rate": 8.290427946631235e-06,
"loss": 1.2803,
"step": 34150
},
{
"epoch": 25.52,
"learning_rate": 8.276587499307978e-06,
"loss": 1.2353,
"step": 34160
},
{
"epoch": 25.53,
"learning_rate": 8.262747051984721e-06,
"loss": 1.2398,
"step": 34170
},
{
"epoch": 25.54,
"learning_rate": 8.248906604661464e-06,
"loss": 1.2491,
"step": 34180
},
{
"epoch": 25.54,
"learning_rate": 8.235066157338207e-06,
"loss": 1.2216,
"step": 34190
},
{
"epoch": 25.55,
"learning_rate": 8.221225710014948e-06,
"loss": 1.2579,
"step": 34200
},
{
"epoch": 25.56,
"learning_rate": 8.20738526269169e-06,
"loss": 1.217,
"step": 34210
},
{
"epoch": 25.57,
"learning_rate": 8.193544815368432e-06,
"loss": 1.2225,
"step": 34220
},
{
"epoch": 25.57,
"learning_rate": 8.179704368045175e-06,
"loss": 1.2732,
"step": 34230
},
{
"epoch": 25.58,
"learning_rate": 8.165863920721918e-06,
"loss": 1.247,
"step": 34240
},
{
"epoch": 25.59,
"learning_rate": 8.152023473398661e-06,
"loss": 1.2676,
"step": 34250
},
{
"epoch": 25.6,
"learning_rate": 8.138183026075404e-06,
"loss": 1.2464,
"step": 34260
},
{
"epoch": 25.6,
"learning_rate": 8.124342578752147e-06,
"loss": 1.2829,
"step": 34270
},
{
"epoch": 25.61,
"learning_rate": 8.110502131428888e-06,
"loss": 1.2518,
"step": 34280
},
{
"epoch": 25.62,
"learning_rate": 8.09666168410563e-06,
"loss": 1.2586,
"step": 34290
},
{
"epoch": 25.63,
"learning_rate": 8.082821236782372e-06,
"loss": 1.2814,
"step": 34300
},
{
"epoch": 25.63,
"learning_rate": 8.068980789459115e-06,
"loss": 1.2144,
"step": 34310
},
{
"epoch": 25.64,
"learning_rate": 8.055140342135858e-06,
"loss": 1.2559,
"step": 34320
},
{
"epoch": 25.65,
"learning_rate": 8.041299894812601e-06,
"loss": 1.2057,
"step": 34330
},
{
"epoch": 25.66,
"learning_rate": 8.027459447489344e-06,
"loss": 1.2684,
"step": 34340
},
{
"epoch": 25.66,
"learning_rate": 8.013619000166085e-06,
"loss": 1.2219,
"step": 34350
},
{
"epoch": 25.67,
"learning_rate": 7.999778552842828e-06,
"loss": 1.2505,
"step": 34360
},
{
"epoch": 25.68,
"learning_rate": 7.985938105519571e-06,
"loss": 1.212,
"step": 34370
},
{
"epoch": 25.69,
"learning_rate": 7.972097658196313e-06,
"loss": 1.2268,
"step": 34380
},
{
"epoch": 25.69,
"learning_rate": 7.958257210873056e-06,
"loss": 1.2596,
"step": 34390
},
{
"epoch": 25.7,
"learning_rate": 7.944416763549799e-06,
"loss": 1.2216,
"step": 34400
},
{
"epoch": 25.71,
"learning_rate": 7.930576316226541e-06,
"loss": 1.2665,
"step": 34410
},
{
"epoch": 25.72,
"learning_rate": 7.916735868903283e-06,
"loss": 1.2708,
"step": 34420
},
{
"epoch": 25.72,
"learning_rate": 7.902895421580026e-06,
"loss": 1.3202,
"step": 34430
},
{
"epoch": 25.73,
"learning_rate": 7.889054974256769e-06,
"loss": 1.2381,
"step": 34440
},
{
"epoch": 25.74,
"learning_rate": 7.875214526933512e-06,
"loss": 1.2456,
"step": 34450
},
{
"epoch": 25.75,
"learning_rate": 7.861374079610253e-06,
"loss": 1.2061,
"step": 34460
},
{
"epoch": 25.75,
"learning_rate": 7.847533632286996e-06,
"loss": 1.2553,
"step": 34470
},
{
"epoch": 25.76,
"learning_rate": 7.833693184963739e-06,
"loss": 1.2826,
"step": 34480
},
{
"epoch": 25.77,
"learning_rate": 7.81985273764048e-06,
"loss": 1.2802,
"step": 34490
},
{
"epoch": 25.78,
"learning_rate": 7.806012290317223e-06,
"loss": 1.2307,
"step": 34500
},
{
"epoch": 25.78,
"learning_rate": 7.792171842993966e-06,
"loss": 1.265,
"step": 34510
},
{
"epoch": 25.79,
"learning_rate": 7.778331395670709e-06,
"loss": 1.2557,
"step": 34520
},
{
"epoch": 25.8,
"learning_rate": 7.764490948347452e-06,
"loss": 1.2836,
"step": 34530
},
{
"epoch": 25.81,
"learning_rate": 7.750650501024195e-06,
"loss": 1.2346,
"step": 34540
},
{
"epoch": 25.81,
"learning_rate": 7.736810053700936e-06,
"loss": 1.2404,
"step": 34550
},
{
"epoch": 25.82,
"learning_rate": 7.722969606377677e-06,
"loss": 1.3049,
"step": 34560
},
{
"epoch": 25.83,
"learning_rate": 7.70912915905442e-06,
"loss": 1.2371,
"step": 34570
},
{
"epoch": 25.83,
"learning_rate": 7.695288711731163e-06,
"loss": 1.2586,
"step": 34580
},
{
"epoch": 25.84,
"learning_rate": 7.681448264407906e-06,
"loss": 1.2417,
"step": 34590
},
{
"epoch": 25.85,
"learning_rate": 7.667607817084649e-06,
"loss": 1.2587,
"step": 34600
},
{
"epoch": 25.86,
"learning_rate": 7.653767369761392e-06,
"loss": 1.255,
"step": 34610
},
{
"epoch": 25.86,
"learning_rate": 7.639926922438135e-06,
"loss": 1.2483,
"step": 34620
},
{
"epoch": 25.87,
"learning_rate": 7.626086475114875e-06,
"loss": 1.2278,
"step": 34630
},
{
"epoch": 25.88,
"learning_rate": 7.612246027791618e-06,
"loss": 1.2501,
"step": 34640
},
{
"epoch": 25.89,
"learning_rate": 7.598405580468361e-06,
"loss": 1.2408,
"step": 34650
},
{
"epoch": 25.89,
"learning_rate": 7.584565133145103e-06,
"loss": 1.2558,
"step": 34660
},
{
"epoch": 25.9,
"learning_rate": 7.570724685821846e-06,
"loss": 1.2684,
"step": 34670
},
{
"epoch": 25.91,
"learning_rate": 7.556884238498589e-06,
"loss": 1.2743,
"step": 34680
},
{
"epoch": 25.92,
"learning_rate": 7.543043791175332e-06,
"loss": 1.265,
"step": 34690
},
{
"epoch": 25.92,
"learning_rate": 7.5292033438520734e-06,
"loss": 1.2723,
"step": 34700
},
{
"epoch": 25.93,
"learning_rate": 7.5153628965288155e-06,
"loss": 1.243,
"step": 34710
},
{
"epoch": 25.94,
"learning_rate": 7.5015224492055585e-06,
"loss": 1.2337,
"step": 34720
},
{
"epoch": 25.95,
"learning_rate": 7.4876820018823014e-06,
"loss": 1.2842,
"step": 34730
},
{
"epoch": 25.95,
"learning_rate": 7.473841554559044e-06,
"loss": 1.2463,
"step": 34740
},
{
"epoch": 25.96,
"learning_rate": 7.4600011072357865e-06,
"loss": 1.2482,
"step": 34750
},
{
"epoch": 25.97,
"learning_rate": 7.446160659912528e-06,
"loss": 1.2865,
"step": 34760
},
{
"epoch": 25.98,
"learning_rate": 7.432320212589271e-06,
"loss": 1.2567,
"step": 34770
},
{
"epoch": 25.98,
"learning_rate": 7.418479765266014e-06,
"loss": 1.2669,
"step": 34780
},
{
"epoch": 25.99,
"learning_rate": 7.404639317942757e-06,
"loss": 1.2718,
"step": 34790
},
{
"epoch": 26.0,
"learning_rate": 7.390798870619499e-06,
"loss": 1.2069,
"step": 34800
},
{
"epoch": 26.0,
"eval_accuracy": 0.5886863805872157,
"eval_loss": 0.9608703851699829,
"eval_runtime": 71.8493,
"eval_samples_per_second": 264.985,
"eval_steps_per_second": 8.281,
"step": 34801
},
{
"epoch": 26.01,
"learning_rate": 7.376958423296242e-06,
"loss": 1.2438,
"step": 34810
},
{
"epoch": 26.01,
"learning_rate": 7.3631179759729846e-06,
"loss": 1.2528,
"step": 34820
},
{
"epoch": 26.02,
"learning_rate": 7.349277528649726e-06,
"loss": 1.2268,
"step": 34830
},
{
"epoch": 26.03,
"learning_rate": 7.335437081326468e-06,
"loss": 1.2431,
"step": 34840
},
{
"epoch": 26.04,
"learning_rate": 7.321596634003211e-06,
"loss": 1.2359,
"step": 34850
},
{
"epoch": 26.04,
"learning_rate": 7.307756186679954e-06,
"loss": 1.2005,
"step": 34860
},
{
"epoch": 26.05,
"learning_rate": 7.293915739356697e-06,
"loss": 1.2221,
"step": 34870
},
{
"epoch": 26.06,
"learning_rate": 7.280075292033439e-06,
"loss": 1.232,
"step": 34880
},
{
"epoch": 26.07,
"learning_rate": 7.266234844710182e-06,
"loss": 1.2136,
"step": 34890
},
{
"epoch": 26.07,
"learning_rate": 7.252394397386923e-06,
"loss": 1.2738,
"step": 34900
},
{
"epoch": 26.08,
"learning_rate": 7.238553950063666e-06,
"loss": 1.2284,
"step": 34910
},
{
"epoch": 26.09,
"learning_rate": 7.224713502740409e-06,
"loss": 1.2845,
"step": 34920
},
{
"epoch": 26.1,
"learning_rate": 7.210873055417151e-06,
"loss": 1.2323,
"step": 34930
},
{
"epoch": 26.1,
"learning_rate": 7.197032608093894e-06,
"loss": 1.2208,
"step": 34940
},
{
"epoch": 26.11,
"learning_rate": 7.183192160770637e-06,
"loss": 1.2409,
"step": 34950
},
{
"epoch": 26.12,
"learning_rate": 7.169351713447379e-06,
"loss": 1.2279,
"step": 34960
},
{
"epoch": 26.13,
"learning_rate": 7.155511266124121e-06,
"loss": 1.2108,
"step": 34970
},
{
"epoch": 26.13,
"learning_rate": 7.141670818800863e-06,
"loss": 1.2808,
"step": 34980
},
{
"epoch": 26.14,
"learning_rate": 7.127830371477606e-06,
"loss": 1.232,
"step": 34990
},
{
"epoch": 26.15,
"learning_rate": 7.113989924154349e-06,
"loss": 1.2387,
"step": 35000
},
{
"epoch": 26.16,
"learning_rate": 7.100149476831091e-06,
"loss": 1.279,
"step": 35010
},
{
"epoch": 26.16,
"learning_rate": 7.086309029507834e-06,
"loss": 1.2562,
"step": 35020
},
{
"epoch": 26.17,
"learning_rate": 7.072468582184577e-06,
"loss": 1.244,
"step": 35030
},
{
"epoch": 26.18,
"learning_rate": 7.058628134861318e-06,
"loss": 1.249,
"step": 35040
},
{
"epoch": 26.19,
"learning_rate": 7.044787687538061e-06,
"loss": 1.2739,
"step": 35050
},
{
"epoch": 26.19,
"learning_rate": 7.0309472402148035e-06,
"loss": 1.2145,
"step": 35060
},
{
"epoch": 26.2,
"learning_rate": 7.017106792891546e-06,
"loss": 1.2437,
"step": 35070
},
{
"epoch": 26.21,
"learning_rate": 7.003266345568289e-06,
"loss": 1.2449,
"step": 35080
},
{
"epoch": 26.22,
"learning_rate": 6.989425898245032e-06,
"loss": 1.293,
"step": 35090
},
{
"epoch": 26.22,
"learning_rate": 6.975585450921774e-06,
"loss": 1.2486,
"step": 35100
},
{
"epoch": 26.23,
"learning_rate": 6.961745003598516e-06,
"loss": 1.2462,
"step": 35110
},
{
"epoch": 26.24,
"learning_rate": 6.947904556275259e-06,
"loss": 1.2662,
"step": 35120
},
{
"epoch": 26.25,
"learning_rate": 6.9340641089520016e-06,
"loss": 1.2589,
"step": 35130
},
{
"epoch": 26.25,
"learning_rate": 6.9202236616287445e-06,
"loss": 1.2743,
"step": 35140
},
{
"epoch": 26.26,
"learning_rate": 6.906383214305487e-06,
"loss": 1.2343,
"step": 35150
},
{
"epoch": 26.27,
"learning_rate": 6.8925427669822296e-06,
"loss": 1.2456,
"step": 35160
},
{
"epoch": 26.28,
"learning_rate": 6.8787023196589725e-06,
"loss": 1.2528,
"step": 35170
},
{
"epoch": 26.28,
"learning_rate": 6.864861872335714e-06,
"loss": 1.2213,
"step": 35180
},
{
"epoch": 26.29,
"learning_rate": 6.851021425012457e-06,
"loss": 1.2059,
"step": 35190
},
{
"epoch": 26.3,
"learning_rate": 6.837180977689199e-06,
"loss": 1.2495,
"step": 35200
},
{
"epoch": 26.31,
"learning_rate": 6.823340530365942e-06,
"loss": 1.2211,
"step": 35210
},
{
"epoch": 26.31,
"learning_rate": 6.809500083042685e-06,
"loss": 1.2353,
"step": 35220
},
{
"epoch": 26.32,
"learning_rate": 6.795659635719427e-06,
"loss": 1.2388,
"step": 35230
},
{
"epoch": 26.33,
"learning_rate": 6.78181918839617e-06,
"loss": 1.2465,
"step": 35240
},
{
"epoch": 26.34,
"learning_rate": 6.767978741072911e-06,
"loss": 1.2465,
"step": 35250
},
{
"epoch": 26.34,
"learning_rate": 6.754138293749654e-06,
"loss": 1.2363,
"step": 35260
},
{
"epoch": 26.35,
"learning_rate": 6.740297846426397e-06,
"loss": 1.2569,
"step": 35270
},
{
"epoch": 26.36,
"learning_rate": 6.726457399103139e-06,
"loss": 1.2451,
"step": 35280
},
{
"epoch": 26.37,
"learning_rate": 6.712616951779882e-06,
"loss": 1.235,
"step": 35290
},
{
"epoch": 26.37,
"learning_rate": 6.698776504456625e-06,
"loss": 1.2379,
"step": 35300
},
{
"epoch": 26.38,
"learning_rate": 6.684936057133368e-06,
"loss": 1.2141,
"step": 35310
},
{
"epoch": 26.39,
"learning_rate": 6.671095609810109e-06,
"loss": 1.2684,
"step": 35320
},
{
"epoch": 26.4,
"learning_rate": 6.657255162486851e-06,
"loss": 1.2691,
"step": 35330
},
{
"epoch": 26.4,
"learning_rate": 6.643414715163594e-06,
"loss": 1.2558,
"step": 35340
},
{
"epoch": 26.41,
"learning_rate": 6.629574267840337e-06,
"loss": 1.2382,
"step": 35350
},
{
"epoch": 26.42,
"learning_rate": 6.615733820517079e-06,
"loss": 1.2671,
"step": 35360
},
{
"epoch": 26.43,
"learning_rate": 6.601893373193822e-06,
"loss": 1.2789,
"step": 35370
},
{
"epoch": 26.43,
"learning_rate": 6.588052925870565e-06,
"loss": 1.2333,
"step": 35380
},
{
"epoch": 26.44,
"learning_rate": 6.574212478547306e-06,
"loss": 1.2525,
"step": 35390
},
{
"epoch": 26.45,
"learning_rate": 6.560372031224049e-06,
"loss": 1.2239,
"step": 35400
},
{
"epoch": 26.45,
"learning_rate": 6.546531583900791e-06,
"loss": 1.2158,
"step": 35410
},
{
"epoch": 26.46,
"learning_rate": 6.532691136577534e-06,
"loss": 1.2573,
"step": 35420
},
{
"epoch": 26.47,
"learning_rate": 6.518850689254277e-06,
"loss": 1.2617,
"step": 35430
},
{
"epoch": 26.48,
"learning_rate": 6.50501024193102e-06,
"loss": 1.2179,
"step": 35440
},
{
"epoch": 26.48,
"learning_rate": 6.491169794607762e-06,
"loss": 1.2559,
"step": 35450
},
{
"epoch": 26.49,
"learning_rate": 6.477329347284504e-06,
"loss": 1.2322,
"step": 35460
},
{
"epoch": 26.5,
"learning_rate": 6.4634888999612466e-06,
"loss": 1.2771,
"step": 35470
},
{
"epoch": 26.51,
"learning_rate": 6.4496484526379895e-06,
"loss": 1.2679,
"step": 35480
},
{
"epoch": 26.51,
"learning_rate": 6.4358080053147324e-06,
"loss": 1.2447,
"step": 35490
},
{
"epoch": 26.52,
"learning_rate": 6.4219675579914745e-06,
"loss": 1.1917,
"step": 35500
},
{
"epoch": 26.53,
"learning_rate": 6.4081271106682175e-06,
"loss": 1.2468,
"step": 35510
},
{
"epoch": 26.54,
"learning_rate": 6.3942866633449604e-06,
"loss": 1.2512,
"step": 35520
},
{
"epoch": 26.54,
"learning_rate": 6.380446216021702e-06,
"loss": 1.2378,
"step": 35530
},
{
"epoch": 26.55,
"learning_rate": 6.366605768698445e-06,
"loss": 1.2603,
"step": 35540
},
{
"epoch": 26.56,
"learning_rate": 6.352765321375187e-06,
"loss": 1.2189,
"step": 35550
},
{
"epoch": 26.57,
"learning_rate": 6.33892487405193e-06,
"loss": 1.2331,
"step": 35560
},
{
"epoch": 26.57,
"learning_rate": 6.325084426728673e-06,
"loss": 1.2105,
"step": 35570
},
{
"epoch": 26.58,
"learning_rate": 6.311243979405415e-06,
"loss": 1.2679,
"step": 35580
},
{
"epoch": 26.59,
"learning_rate": 6.297403532082158e-06,
"loss": 1.24,
"step": 35590
},
{
"epoch": 26.6,
"learning_rate": 6.283563084758899e-06,
"loss": 1.243,
"step": 35600
},
{
"epoch": 26.6,
"learning_rate": 6.269722637435642e-06,
"loss": 1.2369,
"step": 35610
},
{
"epoch": 26.61,
"learning_rate": 6.255882190112385e-06,
"loss": 1.2293,
"step": 35620
},
{
"epoch": 26.62,
"learning_rate": 6.242041742789127e-06,
"loss": 1.2338,
"step": 35630
},
{
"epoch": 26.63,
"learning_rate": 6.22820129546587e-06,
"loss": 1.2278,
"step": 35640
},
{
"epoch": 26.63,
"learning_rate": 6.214360848142612e-06,
"loss": 1.259,
"step": 35650
},
{
"epoch": 26.64,
"learning_rate": 6.200520400819355e-06,
"loss": 1.2223,
"step": 35660
},
{
"epoch": 26.65,
"learning_rate": 6.186679953496097e-06,
"loss": 1.235,
"step": 35670
},
{
"epoch": 26.66,
"learning_rate": 6.172839506172839e-06,
"loss": 1.2691,
"step": 35680
},
{
"epoch": 26.66,
"learning_rate": 6.158999058849582e-06,
"loss": 1.2314,
"step": 35690
},
{
"epoch": 26.67,
"learning_rate": 6.145158611526325e-06,
"loss": 1.3086,
"step": 35700
},
{
"epoch": 26.68,
"learning_rate": 6.131318164203068e-06,
"loss": 1.2574,
"step": 35710
},
{
"epoch": 26.69,
"learning_rate": 6.117477716879809e-06,
"loss": 1.2439,
"step": 35720
},
{
"epoch": 26.69,
"learning_rate": 6.103637269556552e-06,
"loss": 1.2209,
"step": 35730
},
{
"epoch": 26.7,
"learning_rate": 6.089796822233295e-06,
"loss": 1.2746,
"step": 35740
},
{
"epoch": 26.71,
"learning_rate": 6.075956374910037e-06,
"loss": 1.249,
"step": 35750
},
{
"epoch": 26.72,
"learning_rate": 6.06211592758678e-06,
"loss": 1.2273,
"step": 35760
},
{
"epoch": 26.72,
"learning_rate": 6.048275480263522e-06,
"loss": 1.2396,
"step": 35770
},
{
"epoch": 26.73,
"learning_rate": 6.034435032940265e-06,
"loss": 1.2022,
"step": 35780
},
{
"epoch": 26.74,
"learning_rate": 6.020594585617007e-06,
"loss": 1.2721,
"step": 35790
},
{
"epoch": 26.75,
"learning_rate": 6.00675413829375e-06,
"loss": 1.2731,
"step": 35800
},
{
"epoch": 26.75,
"learning_rate": 5.992913690970492e-06,
"loss": 1.2867,
"step": 35810
},
{
"epoch": 26.76,
"learning_rate": 5.9790732436472345e-06,
"loss": 1.2099,
"step": 35820
},
{
"epoch": 26.77,
"learning_rate": 5.9652327963239774e-06,
"loss": 1.2163,
"step": 35830
},
{
"epoch": 26.78,
"learning_rate": 5.95139234900072e-06,
"loss": 1.2708,
"step": 35840
},
{
"epoch": 26.78,
"learning_rate": 5.9375519016774625e-06,
"loss": 1.297,
"step": 35850
},
{
"epoch": 26.79,
"learning_rate": 5.923711454354205e-06,
"loss": 1.2255,
"step": 35860
},
{
"epoch": 26.8,
"learning_rate": 5.9098710070309475e-06,
"loss": 1.2235,
"step": 35870
},
{
"epoch": 26.81,
"learning_rate": 5.8960305597076905e-06,
"loss": 1.2765,
"step": 35880
},
{
"epoch": 26.81,
"learning_rate": 5.882190112384433e-06,
"loss": 1.2433,
"step": 35890
},
{
"epoch": 26.82,
"learning_rate": 5.868349665061175e-06,
"loss": 1.243,
"step": 35900
},
{
"epoch": 26.83,
"learning_rate": 5.854509217737918e-06,
"loss": 1.266,
"step": 35910
},
{
"epoch": 26.84,
"learning_rate": 5.840668770414661e-06,
"loss": 1.2697,
"step": 35920
},
{
"epoch": 26.84,
"learning_rate": 5.826828323091403e-06,
"loss": 1.3006,
"step": 35930
},
{
"epoch": 26.85,
"learning_rate": 5.812987875768145e-06,
"loss": 1.2512,
"step": 35940
},
{
"epoch": 26.86,
"learning_rate": 5.799147428444888e-06,
"loss": 1.2404,
"step": 35950
},
{
"epoch": 26.87,
"learning_rate": 5.78530698112163e-06,
"loss": 1.2293,
"step": 35960
},
{
"epoch": 26.87,
"learning_rate": 5.771466533798373e-06,
"loss": 1.2721,
"step": 35970
},
{
"epoch": 26.88,
"learning_rate": 5.757626086475115e-06,
"loss": 1.2135,
"step": 35980
},
{
"epoch": 26.89,
"learning_rate": 5.743785639151858e-06,
"loss": 1.226,
"step": 35990
},
{
"epoch": 26.9,
"learning_rate": 5.7299451918286e-06,
"loss": 1.2612,
"step": 36000
},
{
"epoch": 26.9,
"learning_rate": 5.716104744505343e-06,
"loss": 1.2432,
"step": 36010
},
{
"epoch": 26.91,
"learning_rate": 5.702264297182086e-06,
"loss": 1.237,
"step": 36020
},
{
"epoch": 26.92,
"learning_rate": 5.688423849858827e-06,
"loss": 1.2486,
"step": 36030
},
{
"epoch": 26.93,
"learning_rate": 5.67458340253557e-06,
"loss": 1.1792,
"step": 36040
},
{
"epoch": 26.93,
"learning_rate": 5.660742955212313e-06,
"loss": 1.2404,
"step": 36050
},
{
"epoch": 26.94,
"learning_rate": 5.646902507889056e-06,
"loss": 1.2459,
"step": 36060
},
{
"epoch": 26.95,
"learning_rate": 5.633062060565797e-06,
"loss": 1.2341,
"step": 36070
},
{
"epoch": 26.96,
"learning_rate": 5.61922161324254e-06,
"loss": 1.2697,
"step": 36080
},
{
"epoch": 26.96,
"learning_rate": 5.605381165919283e-06,
"loss": 1.2787,
"step": 36090
},
{
"epoch": 26.97,
"learning_rate": 5.591540718596025e-06,
"loss": 1.2616,
"step": 36100
},
{
"epoch": 26.98,
"learning_rate": 5.577700271272768e-06,
"loss": 1.2652,
"step": 36110
},
{
"epoch": 26.99,
"learning_rate": 5.56385982394951e-06,
"loss": 1.253,
"step": 36120
},
{
"epoch": 26.99,
"learning_rate": 5.550019376626253e-06,
"loss": 1.2156,
"step": 36130
},
{
"epoch": 27.0,
"eval_accuracy": 0.6052313671936551,
"eval_loss": 0.929703414440155,
"eval_runtime": 72.2819,
"eval_samples_per_second": 263.399,
"eval_steps_per_second": 8.232,
"step": 36139
},
{
"epoch": 27.0,
"learning_rate": 5.536178929302995e-06,
"loss": 1.276,
"step": 36140
},
{
"epoch": 27.01,
"learning_rate": 5.522338481979738e-06,
"loss": 1.2354,
"step": 36150
},
{
"epoch": 27.02,
"learning_rate": 5.50849803465648e-06,
"loss": 1.2509,
"step": 36160
},
{
"epoch": 27.02,
"learning_rate": 5.494657587333222e-06,
"loss": 1.2509,
"step": 36170
},
{
"epoch": 27.03,
"learning_rate": 5.480817140009965e-06,
"loss": 1.2197,
"step": 36180
},
{
"epoch": 27.04,
"learning_rate": 5.466976692686708e-06,
"loss": 1.2327,
"step": 36190
},
{
"epoch": 27.05,
"learning_rate": 5.45313624536345e-06,
"loss": 1.2621,
"step": 36200
},
{
"epoch": 27.05,
"learning_rate": 5.4392957980401925e-06,
"loss": 1.2482,
"step": 36210
},
{
"epoch": 27.06,
"learning_rate": 5.4254553507169355e-06,
"loss": 1.2306,
"step": 36220
},
{
"epoch": 27.07,
"learning_rate": 5.411614903393678e-06,
"loss": 1.262,
"step": 36230
},
{
"epoch": 27.08,
"learning_rate": 5.3977744560704205e-06,
"loss": 1.2515,
"step": 36240
},
{
"epoch": 27.08,
"learning_rate": 5.383934008747163e-06,
"loss": 1.2554,
"step": 36250
},
{
"epoch": 27.09,
"learning_rate": 5.3700935614239056e-06,
"loss": 1.252,
"step": 36260
},
{
"epoch": 27.1,
"learning_rate": 5.356253114100648e-06,
"loss": 1.2446,
"step": 36270
},
{
"epoch": 27.1,
"learning_rate": 5.342412666777391e-06,
"loss": 1.2583,
"step": 36280
},
{
"epoch": 27.11,
"learning_rate": 5.328572219454133e-06,
"loss": 1.2563,
"step": 36290
},
{
"epoch": 27.12,
"learning_rate": 5.314731772130876e-06,
"loss": 1.2436,
"step": 36300
},
{
"epoch": 27.13,
"learning_rate": 5.300891324807618e-06,
"loss": 1.2648,
"step": 36310
},
{
"epoch": 27.13,
"learning_rate": 5.287050877484361e-06,
"loss": 1.2405,
"step": 36320
},
{
"epoch": 27.14,
"learning_rate": 5.273210430161103e-06,
"loss": 1.2401,
"step": 36330
},
{
"epoch": 27.15,
"learning_rate": 5.259369982837845e-06,
"loss": 1.2604,
"step": 36340
},
{
"epoch": 27.16,
"learning_rate": 5.245529535514588e-06,
"loss": 1.2621,
"step": 36350
},
{
"epoch": 27.16,
"learning_rate": 5.231689088191331e-06,
"loss": 1.2607,
"step": 36360
},
{
"epoch": 27.17,
"learning_rate": 5.217848640868074e-06,
"loss": 1.2278,
"step": 36370
},
{
"epoch": 27.18,
"learning_rate": 5.204008193544815e-06,
"loss": 1.2571,
"step": 36380
},
{
"epoch": 27.19,
"learning_rate": 5.190167746221558e-06,
"loss": 1.2199,
"step": 36390
},
{
"epoch": 27.19,
"learning_rate": 5.176327298898301e-06,
"loss": 1.2359,
"step": 36400
},
{
"epoch": 27.2,
"learning_rate": 5.162486851575043e-06,
"loss": 1.2512,
"step": 36410
},
{
"epoch": 27.21,
"learning_rate": 5.148646404251786e-06,
"loss": 1.2459,
"step": 36420
},
{
"epoch": 27.22,
"learning_rate": 5.134805956928528e-06,
"loss": 1.2608,
"step": 36430
},
{
"epoch": 27.22,
"learning_rate": 5.120965509605271e-06,
"loss": 1.2213,
"step": 36440
},
{
"epoch": 27.23,
"learning_rate": 5.107125062282013e-06,
"loss": 1.2441,
"step": 36450
},
{
"epoch": 27.24,
"learning_rate": 5.093284614958756e-06,
"loss": 1.2578,
"step": 36460
},
{
"epoch": 27.25,
"learning_rate": 5.079444167635498e-06,
"loss": 1.2305,
"step": 36470
},
{
"epoch": 27.25,
"learning_rate": 5.06560372031224e-06,
"loss": 1.2341,
"step": 36480
},
{
"epoch": 27.26,
"learning_rate": 5.051763272988983e-06,
"loss": 1.2307,
"step": 36490
},
{
"epoch": 27.27,
"learning_rate": 5.037922825665726e-06,
"loss": 1.2246,
"step": 36500
},
{
"epoch": 27.28,
"learning_rate": 5.024082378342468e-06,
"loss": 1.2634,
"step": 36510
},
{
"epoch": 27.28,
"learning_rate": 5.01024193101921e-06,
"loss": 1.2228,
"step": 36520
},
{
"epoch": 27.29,
"learning_rate": 4.996401483695953e-06,
"loss": 1.2904,
"step": 36530
},
{
"epoch": 27.3,
"learning_rate": 4.982561036372696e-06,
"loss": 1.2588,
"step": 36540
},
{
"epoch": 27.31,
"learning_rate": 4.968720589049438e-06,
"loss": 1.2824,
"step": 36550
},
{
"epoch": 27.31,
"learning_rate": 4.9548801417261805e-06,
"loss": 1.265,
"step": 36560
},
{
"epoch": 27.32,
"learning_rate": 4.941039694402923e-06,
"loss": 1.2445,
"step": 36570
},
{
"epoch": 27.33,
"learning_rate": 4.927199247079666e-06,
"loss": 1.2281,
"step": 36580
},
{
"epoch": 27.34,
"learning_rate": 4.9133587997564085e-06,
"loss": 1.256,
"step": 36590
},
{
"epoch": 27.34,
"learning_rate": 4.8995183524331506e-06,
"loss": 1.2435,
"step": 36600
},
{
"epoch": 27.35,
"learning_rate": 4.8856779051098935e-06,
"loss": 1.2018,
"step": 36610
},
{
"epoch": 27.36,
"learning_rate": 4.871837457786636e-06,
"loss": 1.228,
"step": 36620
},
{
"epoch": 27.37,
"learning_rate": 4.8579970104633785e-06,
"loss": 1.2688,
"step": 36630
},
{
"epoch": 27.37,
"learning_rate": 4.844156563140121e-06,
"loss": 1.2114,
"step": 36640
},
{
"epoch": 27.38,
"learning_rate": 4.830316115816864e-06,
"loss": 1.221,
"step": 36650
},
{
"epoch": 27.39,
"learning_rate": 4.816475668493606e-06,
"loss": 1.2732,
"step": 36660
},
{
"epoch": 27.4,
"learning_rate": 4.802635221170349e-06,
"loss": 1.2172,
"step": 36670
},
{
"epoch": 27.4,
"learning_rate": 4.788794773847092e-06,
"loss": 1.2248,
"step": 36680
},
{
"epoch": 27.41,
"learning_rate": 4.774954326523833e-06,
"loss": 1.269,
"step": 36690
},
{
"epoch": 27.42,
"learning_rate": 4.761113879200576e-06,
"loss": 1.2365,
"step": 36700
},
{
"epoch": 27.43,
"learning_rate": 4.747273431877319e-06,
"loss": 1.2274,
"step": 36710
},
{
"epoch": 27.43,
"learning_rate": 4.733432984554062e-06,
"loss": 1.2539,
"step": 36720
},
{
"epoch": 27.44,
"learning_rate": 4.719592537230803e-06,
"loss": 1.2198,
"step": 36730
},
{
"epoch": 27.45,
"learning_rate": 4.705752089907546e-06,
"loss": 1.2273,
"step": 36740
},
{
"epoch": 27.46,
"learning_rate": 4.691911642584289e-06,
"loss": 1.2143,
"step": 36750
},
{
"epoch": 27.46,
"learning_rate": 4.678071195261031e-06,
"loss": 1.2428,
"step": 36760
},
{
"epoch": 27.47,
"learning_rate": 4.664230747937774e-06,
"loss": 1.259,
"step": 36770
},
{
"epoch": 27.48,
"learning_rate": 4.650390300614516e-06,
"loss": 1.2266,
"step": 36780
},
{
"epoch": 27.49,
"learning_rate": 4.636549853291259e-06,
"loss": 1.2657,
"step": 36790
},
{
"epoch": 27.49,
"learning_rate": 4.622709405968001e-06,
"loss": 1.2608,
"step": 36800
},
{
"epoch": 27.5,
"learning_rate": 4.608868958644744e-06,
"loss": 1.2351,
"step": 36810
},
{
"epoch": 27.51,
"learning_rate": 4.595028511321486e-06,
"loss": 1.2542,
"step": 36820
},
{
"epoch": 27.52,
"learning_rate": 4.581188063998228e-06,
"loss": 1.2194,
"step": 36830
},
{
"epoch": 27.52,
"learning_rate": 4.567347616674971e-06,
"loss": 1.2335,
"step": 36840
},
{
"epoch": 27.53,
"learning_rate": 4.553507169351714e-06,
"loss": 1.2545,
"step": 36850
},
{
"epoch": 27.54,
"learning_rate": 4.539666722028456e-06,
"loss": 1.2323,
"step": 36860
},
{
"epoch": 27.55,
"learning_rate": 4.525826274705198e-06,
"loss": 1.2449,
"step": 36870
},
{
"epoch": 27.55,
"learning_rate": 4.511985827381941e-06,
"loss": 1.2881,
"step": 36880
},
{
"epoch": 27.56,
"learning_rate": 4.498145380058684e-06,
"loss": 1.2421,
"step": 36890
},
{
"epoch": 27.57,
"learning_rate": 4.484304932735426e-06,
"loss": 1.2591,
"step": 36900
},
{
"epoch": 27.58,
"learning_rate": 4.470464485412168e-06,
"loss": 1.2558,
"step": 36910
},
{
"epoch": 27.58,
"learning_rate": 4.456624038088911e-06,
"loss": 1.2237,
"step": 36920
},
{
"epoch": 27.59,
"learning_rate": 4.4427835907656534e-06,
"loss": 1.2245,
"step": 36930
},
{
"epoch": 27.6,
"learning_rate": 4.428943143442396e-06,
"loss": 1.1873,
"step": 36940
},
{
"epoch": 27.61,
"learning_rate": 4.4151026961191385e-06,
"loss": 1.2363,
"step": 36950
},
{
"epoch": 27.61,
"learning_rate": 4.4012622487958814e-06,
"loss": 1.2607,
"step": 36960
},
{
"epoch": 27.62,
"learning_rate": 4.3874218014726235e-06,
"loss": 1.257,
"step": 36970
},
{
"epoch": 27.63,
"learning_rate": 4.3735813541493665e-06,
"loss": 1.247,
"step": 36980
},
{
"epoch": 27.64,
"learning_rate": 4.359740906826109e-06,
"loss": 1.2013,
"step": 36990
},
{
"epoch": 27.64,
"learning_rate": 4.345900459502851e-06,
"loss": 1.2236,
"step": 37000
},
{
"epoch": 27.65,
"learning_rate": 4.332060012179594e-06,
"loss": 1.2463,
"step": 37010
},
{
"epoch": 27.66,
"learning_rate": 4.318219564856337e-06,
"loss": 1.2709,
"step": 37020
},
{
"epoch": 27.67,
"learning_rate": 4.3043791175330795e-06,
"loss": 1.2383,
"step": 37030
},
{
"epoch": 27.67,
"learning_rate": 4.290538670209821e-06,
"loss": 1.2196,
"step": 37040
},
{
"epoch": 27.68,
"learning_rate": 4.276698222886564e-06,
"loss": 1.2193,
"step": 37050
},
{
"epoch": 27.69,
"learning_rate": 4.262857775563307e-06,
"loss": 1.1887,
"step": 37060
},
{
"epoch": 27.7,
"learning_rate": 4.249017328240049e-06,
"loss": 1.2343,
"step": 37070
},
{
"epoch": 27.7,
"learning_rate": 4.235176880916792e-06,
"loss": 1.2173,
"step": 37080
},
{
"epoch": 27.71,
"learning_rate": 4.221336433593534e-06,
"loss": 1.212,
"step": 37090
},
{
"epoch": 27.72,
"learning_rate": 4.207495986270277e-06,
"loss": 1.2031,
"step": 37100
},
{
"epoch": 27.73,
"learning_rate": 4.193655538947019e-06,
"loss": 1.2592,
"step": 37110
},
{
"epoch": 27.73,
"learning_rate": 4.179815091623762e-06,
"loss": 1.2174,
"step": 37120
},
{
"epoch": 27.74,
"learning_rate": 4.165974644300504e-06,
"loss": 1.2516,
"step": 37130
},
{
"epoch": 27.75,
"learning_rate": 4.152134196977246e-06,
"loss": 1.2126,
"step": 37140
},
{
"epoch": 27.75,
"learning_rate": 4.138293749653989e-06,
"loss": 1.2819,
"step": 37150
},
{
"epoch": 27.76,
"learning_rate": 4.124453302330732e-06,
"loss": 1.2452,
"step": 37160
},
{
"epoch": 27.77,
"learning_rate": 4.110612855007474e-06,
"loss": 1.2419,
"step": 37170
},
{
"epoch": 27.78,
"learning_rate": 4.096772407684216e-06,
"loss": 1.2513,
"step": 37180
},
{
"epoch": 27.78,
"learning_rate": 4.082931960360959e-06,
"loss": 1.2628,
"step": 37190
},
{
"epoch": 27.79,
"learning_rate": 4.069091513037702e-06,
"loss": 1.2863,
"step": 37200
},
{
"epoch": 27.8,
"learning_rate": 4.055251065714444e-06,
"loss": 1.2385,
"step": 37210
},
{
"epoch": 27.81,
"learning_rate": 4.041410618391186e-06,
"loss": 1.2578,
"step": 37220
},
{
"epoch": 27.81,
"learning_rate": 4.027570171067929e-06,
"loss": 1.2298,
"step": 37230
},
{
"epoch": 27.82,
"learning_rate": 4.013729723744672e-06,
"loss": 1.2373,
"step": 37240
},
{
"epoch": 27.83,
"learning_rate": 3.999889276421414e-06,
"loss": 1.2314,
"step": 37250
},
{
"epoch": 27.84,
"learning_rate": 3.986048829098156e-06,
"loss": 1.2335,
"step": 37260
},
{
"epoch": 27.84,
"learning_rate": 3.972208381774899e-06,
"loss": 1.2678,
"step": 37270
},
{
"epoch": 27.85,
"learning_rate": 3.958367934451641e-06,
"loss": 1.2751,
"step": 37280
},
{
"epoch": 27.86,
"learning_rate": 3.944527487128384e-06,
"loss": 1.2821,
"step": 37290
},
{
"epoch": 27.87,
"learning_rate": 3.930687039805126e-06,
"loss": 1.2279,
"step": 37300
},
{
"epoch": 27.87,
"learning_rate": 3.916846592481869e-06,
"loss": 1.22,
"step": 37310
},
{
"epoch": 27.88,
"learning_rate": 3.9030061451586115e-06,
"loss": 1.2412,
"step": 37320
},
{
"epoch": 27.89,
"learning_rate": 3.889165697835354e-06,
"loss": 1.275,
"step": 37330
},
{
"epoch": 27.9,
"learning_rate": 3.875325250512097e-06,
"loss": 1.2582,
"step": 37340
},
{
"epoch": 27.9,
"learning_rate": 3.861484803188839e-06,
"loss": 1.2128,
"step": 37350
},
{
"epoch": 27.91,
"learning_rate": 3.8476443558655816e-06,
"loss": 1.2292,
"step": 37360
},
{
"epoch": 27.92,
"learning_rate": 3.8338039085423245e-06,
"loss": 1.2622,
"step": 37370
},
{
"epoch": 27.93,
"learning_rate": 3.8199634612190675e-06,
"loss": 1.2569,
"step": 37380
},
{
"epoch": 27.93,
"learning_rate": 3.806123013895809e-06,
"loss": 1.216,
"step": 37390
},
{
"epoch": 27.94,
"learning_rate": 3.7922825665725517e-06,
"loss": 1.2232,
"step": 37400
},
{
"epoch": 27.95,
"learning_rate": 3.7784421192492946e-06,
"loss": 1.2687,
"step": 37410
},
{
"epoch": 27.96,
"learning_rate": 3.7646016719260367e-06,
"loss": 1.2191,
"step": 37420
},
{
"epoch": 27.96,
"learning_rate": 3.7507612246027792e-06,
"loss": 1.2395,
"step": 37430
},
{
"epoch": 27.97,
"learning_rate": 3.736920777279522e-06,
"loss": 1.2341,
"step": 37440
},
{
"epoch": 27.98,
"learning_rate": 3.723080329956264e-06,
"loss": 1.2167,
"step": 37450
},
{
"epoch": 27.99,
"learning_rate": 3.709239882633007e-06,
"loss": 1.2519,
"step": 37460
},
{
"epoch": 27.99,
"learning_rate": 3.6953994353097493e-06,
"loss": 1.25,
"step": 37470
},
{
"epoch": 28.0,
"eval_accuracy": 0.6062293187667419,
"eval_loss": 0.929965078830719,
"eval_runtime": 72.3133,
"eval_samples_per_second": 263.285,
"eval_steps_per_second": 8.228,
"step": 37478
},
{
"epoch": 28.0,
"learning_rate": 3.6815589879864923e-06,
"loss": 1.2726,
"step": 37480
},
{
"epoch": 28.01,
"learning_rate": 3.667718540663234e-06,
"loss": 1.2259,
"step": 37490
},
{
"epoch": 28.02,
"learning_rate": 3.653878093339977e-06,
"loss": 1.2426,
"step": 37500
},
{
"epoch": 28.02,
"learning_rate": 3.6400376460167194e-06,
"loss": 1.2488,
"step": 37510
},
{
"epoch": 28.03,
"learning_rate": 3.6261971986934615e-06,
"loss": 1.2371,
"step": 37520
},
{
"epoch": 28.04,
"learning_rate": 3.6123567513702045e-06,
"loss": 1.2081,
"step": 37530
},
{
"epoch": 28.05,
"learning_rate": 3.598516304046947e-06,
"loss": 1.2414,
"step": 37540
},
{
"epoch": 28.05,
"learning_rate": 3.5846758567236895e-06,
"loss": 1.2304,
"step": 37550
},
{
"epoch": 28.06,
"learning_rate": 3.5708354094004316e-06,
"loss": 1.2375,
"step": 37560
},
{
"epoch": 28.07,
"learning_rate": 3.5569949620771746e-06,
"loss": 1.2683,
"step": 37570
},
{
"epoch": 28.08,
"learning_rate": 3.543154514753917e-06,
"loss": 1.256,
"step": 37580
},
{
"epoch": 28.08,
"learning_rate": 3.529314067430659e-06,
"loss": 1.2549,
"step": 37590
},
{
"epoch": 28.09,
"learning_rate": 3.5154736201074017e-06,
"loss": 1.2274,
"step": 37600
},
{
"epoch": 28.1,
"learning_rate": 3.5016331727841447e-06,
"loss": 1.2066,
"step": 37610
},
{
"epoch": 28.11,
"learning_rate": 3.487792725460887e-06,
"loss": 1.2253,
"step": 37620
},
{
"epoch": 28.11,
"learning_rate": 3.4739522781376293e-06,
"loss": 1.1962,
"step": 37630
},
{
"epoch": 28.12,
"learning_rate": 3.4601118308143723e-06,
"loss": 1.2548,
"step": 37640
},
{
"epoch": 28.13,
"learning_rate": 3.4462713834911148e-06,
"loss": 1.2198,
"step": 37650
},
{
"epoch": 28.14,
"learning_rate": 3.432430936167857e-06,
"loss": 1.2474,
"step": 37660
},
{
"epoch": 28.14,
"learning_rate": 3.4185904888445994e-06,
"loss": 1.2783,
"step": 37670
},
{
"epoch": 28.15,
"learning_rate": 3.4047500415213424e-06,
"loss": 1.2544,
"step": 37680
},
{
"epoch": 28.16,
"learning_rate": 3.390909594198085e-06,
"loss": 1.2552,
"step": 37690
},
{
"epoch": 28.17,
"learning_rate": 3.377069146874827e-06,
"loss": 1.2109,
"step": 37700
},
{
"epoch": 28.17,
"learning_rate": 3.3632286995515695e-06,
"loss": 1.2485,
"step": 37710
},
{
"epoch": 28.18,
"learning_rate": 3.3493882522283125e-06,
"loss": 1.2431,
"step": 37720
},
{
"epoch": 28.19,
"learning_rate": 3.3355478049050546e-06,
"loss": 1.2315,
"step": 37730
},
{
"epoch": 28.2,
"learning_rate": 3.321707357581797e-06,
"loss": 1.2938,
"step": 37740
},
{
"epoch": 28.2,
"learning_rate": 3.3078669102585396e-06,
"loss": 1.2239,
"step": 37750
},
{
"epoch": 28.21,
"learning_rate": 3.2940264629352826e-06,
"loss": 1.2472,
"step": 37760
},
{
"epoch": 28.22,
"learning_rate": 3.2801860156120247e-06,
"loss": 1.1985,
"step": 37770
},
{
"epoch": 28.23,
"learning_rate": 3.266345568288767e-06,
"loss": 1.2605,
"step": 37780
},
{
"epoch": 28.23,
"learning_rate": 3.25250512096551e-06,
"loss": 1.2398,
"step": 37790
},
{
"epoch": 28.24,
"learning_rate": 3.238664673642252e-06,
"loss": 1.2242,
"step": 37800
},
{
"epoch": 28.25,
"learning_rate": 3.2248242263189947e-06,
"loss": 1.2384,
"step": 37810
},
{
"epoch": 28.26,
"learning_rate": 3.2109837789957373e-06,
"loss": 1.1893,
"step": 37820
},
{
"epoch": 28.26,
"learning_rate": 3.1971433316724802e-06,
"loss": 1.1965,
"step": 37830
},
{
"epoch": 28.27,
"learning_rate": 3.1833028843492223e-06,
"loss": 1.2695,
"step": 37840
},
{
"epoch": 28.28,
"learning_rate": 3.169462437025965e-06,
"loss": 1.2492,
"step": 37850
},
{
"epoch": 28.29,
"learning_rate": 3.1556219897027074e-06,
"loss": 1.2464,
"step": 37860
},
{
"epoch": 28.29,
"learning_rate": 3.1417815423794495e-06,
"loss": 1.2604,
"step": 37870
},
{
"epoch": 28.3,
"learning_rate": 3.1279410950561924e-06,
"loss": 1.2165,
"step": 37880
},
{
"epoch": 28.31,
"learning_rate": 3.114100647732935e-06,
"loss": 1.2388,
"step": 37890
},
{
"epoch": 28.32,
"learning_rate": 3.1002602004096775e-06,
"loss": 1.2236,
"step": 37900
},
{
"epoch": 28.32,
"learning_rate": 3.0864197530864196e-06,
"loss": 1.2169,
"step": 37910
},
{
"epoch": 28.33,
"learning_rate": 3.0725793057631625e-06,
"loss": 1.2423,
"step": 37920
},
{
"epoch": 28.34,
"learning_rate": 3.0587388584399046e-06,
"loss": 1.2522,
"step": 37930
},
{
"epoch": 28.35,
"learning_rate": 3.0448984111166476e-06,
"loss": 1.215,
"step": 37940
},
{
"epoch": 28.35,
"learning_rate": 3.03105796379339e-06,
"loss": 1.2873,
"step": 37950
},
{
"epoch": 28.36,
"learning_rate": 3.0172175164701326e-06,
"loss": 1.2793,
"step": 37960
},
{
"epoch": 28.37,
"learning_rate": 3.003377069146875e-06,
"loss": 1.2454,
"step": 37970
},
{
"epoch": 28.38,
"learning_rate": 2.9895366218236172e-06,
"loss": 1.2167,
"step": 37980
},
{
"epoch": 28.38,
"learning_rate": 2.97569617450036e-06,
"loss": 1.2509,
"step": 37990
},
{
"epoch": 28.39,
"learning_rate": 2.9618557271771023e-06,
"loss": 1.2357,
"step": 38000
},
{
"epoch": 28.4,
"learning_rate": 2.9480152798538452e-06,
"loss": 1.2323,
"step": 38010
},
{
"epoch": 28.4,
"learning_rate": 2.9341748325305873e-06,
"loss": 1.2745,
"step": 38020
},
{
"epoch": 28.41,
"learning_rate": 2.9203343852073303e-06,
"loss": 1.2136,
"step": 38030
},
{
"epoch": 28.42,
"learning_rate": 2.9064939378840724e-06,
"loss": 1.2217,
"step": 38040
},
{
"epoch": 28.43,
"learning_rate": 2.892653490560815e-06,
"loss": 1.1896,
"step": 38050
},
{
"epoch": 28.43,
"learning_rate": 2.8788130432375574e-06,
"loss": 1.2318,
"step": 38060
},
{
"epoch": 28.44,
"learning_rate": 2.8649725959143e-06,
"loss": 1.213,
"step": 38070
},
{
"epoch": 28.45,
"learning_rate": 2.851132148591043e-06,
"loss": 1.2356,
"step": 38080
},
{
"epoch": 28.46,
"learning_rate": 2.837291701267785e-06,
"loss": 1.2233,
"step": 38090
},
{
"epoch": 28.46,
"learning_rate": 2.823451253944528e-06,
"loss": 1.2285,
"step": 38100
},
{
"epoch": 28.47,
"learning_rate": 2.80961080662127e-06,
"loss": 1.228,
"step": 38110
},
{
"epoch": 28.48,
"learning_rate": 2.7957703592980126e-06,
"loss": 1.2503,
"step": 38120
},
{
"epoch": 28.49,
"learning_rate": 2.781929911974755e-06,
"loss": 1.2176,
"step": 38130
},
{
"epoch": 28.49,
"learning_rate": 2.7680894646514976e-06,
"loss": 1.2179,
"step": 38140
},
{
"epoch": 28.5,
"learning_rate": 2.75424901732824e-06,
"loss": 1.2224,
"step": 38150
},
{
"epoch": 28.51,
"learning_rate": 2.7404085700049827e-06,
"loss": 1.242,
"step": 38160
},
{
"epoch": 28.52,
"learning_rate": 2.726568122681725e-06,
"loss": 1.2626,
"step": 38170
},
{
"epoch": 28.52,
"learning_rate": 2.7127276753584677e-06,
"loss": 1.229,
"step": 38180
},
{
"epoch": 28.53,
"learning_rate": 2.6988872280352103e-06,
"loss": 1.2598,
"step": 38190
},
{
"epoch": 28.54,
"learning_rate": 2.6850467807119528e-06,
"loss": 1.258,
"step": 38200
},
{
"epoch": 28.55,
"learning_rate": 2.6712063333886953e-06,
"loss": 1.2389,
"step": 38210
},
{
"epoch": 28.55,
"learning_rate": 2.657365886065438e-06,
"loss": 1.2926,
"step": 38220
},
{
"epoch": 28.56,
"learning_rate": 2.6435254387421804e-06,
"loss": 1.223,
"step": 38230
},
{
"epoch": 28.57,
"learning_rate": 2.6296849914189225e-06,
"loss": 1.2342,
"step": 38240
},
{
"epoch": 28.58,
"learning_rate": 2.6158445440956654e-06,
"loss": 1.229,
"step": 38250
},
{
"epoch": 28.58,
"learning_rate": 2.6020040967724075e-06,
"loss": 1.2812,
"step": 38260
},
{
"epoch": 28.59,
"learning_rate": 2.5881636494491505e-06,
"loss": 1.309,
"step": 38270
},
{
"epoch": 28.6,
"learning_rate": 2.574323202125893e-06,
"loss": 1.2579,
"step": 38280
},
{
"epoch": 28.61,
"learning_rate": 2.5604827548026355e-06,
"loss": 1.2266,
"step": 38290
},
{
"epoch": 28.61,
"learning_rate": 2.546642307479378e-06,
"loss": 1.2505,
"step": 38300
},
{
"epoch": 28.62,
"learning_rate": 2.53280186015612e-06,
"loss": 1.2296,
"step": 38310
},
{
"epoch": 28.63,
"learning_rate": 2.518961412832863e-06,
"loss": 1.2593,
"step": 38320
},
{
"epoch": 28.64,
"learning_rate": 2.505120965509605e-06,
"loss": 1.1676,
"step": 38330
},
{
"epoch": 28.64,
"learning_rate": 2.491280518186348e-06,
"loss": 1.2604,
"step": 38340
},
{
"epoch": 28.65,
"learning_rate": 2.4774400708630902e-06,
"loss": 1.2826,
"step": 38350
},
{
"epoch": 28.66,
"learning_rate": 2.463599623539833e-06,
"loss": 1.2232,
"step": 38360
},
{
"epoch": 28.67,
"learning_rate": 2.4497591762165753e-06,
"loss": 1.2278,
"step": 38370
},
{
"epoch": 28.67,
"learning_rate": 2.435918728893318e-06,
"loss": 1.2568,
"step": 38380
},
{
"epoch": 28.68,
"learning_rate": 2.4220782815700603e-06,
"loss": 1.2323,
"step": 38390
},
{
"epoch": 28.69,
"learning_rate": 2.408237834246803e-06,
"loss": 1.1977,
"step": 38400
},
{
"epoch": 28.7,
"learning_rate": 2.394397386923546e-06,
"loss": 1.2139,
"step": 38410
},
{
"epoch": 28.7,
"learning_rate": 2.380556939600288e-06,
"loss": 1.2274,
"step": 38420
},
{
"epoch": 28.71,
"learning_rate": 2.366716492277031e-06,
"loss": 1.2373,
"step": 38430
},
{
"epoch": 28.72,
"learning_rate": 2.352876044953773e-06,
"loss": 1.2279,
"step": 38440
},
{
"epoch": 28.73,
"learning_rate": 2.3390355976305155e-06,
"loss": 1.238,
"step": 38450
},
{
"epoch": 28.73,
"learning_rate": 2.325195150307258e-06,
"loss": 1.198,
"step": 38460
},
{
"epoch": 28.74,
"learning_rate": 2.3113547029840005e-06,
"loss": 1.2051,
"step": 38470
},
{
"epoch": 28.75,
"learning_rate": 2.297514255660743e-06,
"loss": 1.2437,
"step": 38480
},
{
"epoch": 28.76,
"learning_rate": 2.2836738083374856e-06,
"loss": 1.2166,
"step": 38490
},
{
"epoch": 28.76,
"learning_rate": 2.269833361014228e-06,
"loss": 1.2579,
"step": 38500
},
{
"epoch": 28.77,
"learning_rate": 2.2559929136909706e-06,
"loss": 1.2072,
"step": 38510
},
{
"epoch": 28.78,
"learning_rate": 2.242152466367713e-06,
"loss": 1.2211,
"step": 38520
},
{
"epoch": 28.79,
"learning_rate": 2.2283120190444557e-06,
"loss": 1.2664,
"step": 38530
},
{
"epoch": 28.79,
"learning_rate": 2.214471571721198e-06,
"loss": 1.2459,
"step": 38540
},
{
"epoch": 28.8,
"learning_rate": 2.2006311243979407e-06,
"loss": 1.2375,
"step": 38550
},
{
"epoch": 28.81,
"learning_rate": 2.1867906770746832e-06,
"loss": 1.2362,
"step": 38560
},
{
"epoch": 28.82,
"learning_rate": 2.1729502297514253e-06,
"loss": 1.2429,
"step": 38570
},
{
"epoch": 28.82,
"learning_rate": 2.1591097824281683e-06,
"loss": 1.2417,
"step": 38580
},
{
"epoch": 28.83,
"learning_rate": 2.1452693351049104e-06,
"loss": 1.2558,
"step": 38590
},
{
"epoch": 28.84,
"learning_rate": 2.1314288877816533e-06,
"loss": 1.2473,
"step": 38600
},
{
"epoch": 28.85,
"learning_rate": 2.117588440458396e-06,
"loss": 1.2283,
"step": 38610
},
{
"epoch": 28.85,
"learning_rate": 2.1037479931351384e-06,
"loss": 1.2685,
"step": 38620
},
{
"epoch": 28.86,
"learning_rate": 2.089907545811881e-06,
"loss": 1.2504,
"step": 38630
},
{
"epoch": 28.87,
"learning_rate": 2.076067098488623e-06,
"loss": 1.2691,
"step": 38640
},
{
"epoch": 28.88,
"learning_rate": 2.062226651165366e-06,
"loss": 1.2552,
"step": 38650
},
{
"epoch": 28.88,
"learning_rate": 2.048386203842108e-06,
"loss": 1.2492,
"step": 38660
},
{
"epoch": 28.89,
"learning_rate": 2.034545756518851e-06,
"loss": 1.2609,
"step": 38670
},
{
"epoch": 28.9,
"learning_rate": 2.020705309195593e-06,
"loss": 1.2309,
"step": 38680
},
{
"epoch": 28.91,
"learning_rate": 2.006864861872336e-06,
"loss": 1.2205,
"step": 38690
},
{
"epoch": 28.91,
"learning_rate": 1.993024414549078e-06,
"loss": 1.2213,
"step": 38700
},
{
"epoch": 28.92,
"learning_rate": 1.9791839672258207e-06,
"loss": 1.2718,
"step": 38710
},
{
"epoch": 28.93,
"learning_rate": 1.965343519902563e-06,
"loss": 1.2293,
"step": 38720
},
{
"epoch": 28.94,
"learning_rate": 1.9515030725793057e-06,
"loss": 1.2075,
"step": 38730
},
{
"epoch": 28.94,
"learning_rate": 1.9376626252560487e-06,
"loss": 1.2555,
"step": 38740
},
{
"epoch": 28.95,
"learning_rate": 1.9238221779327908e-06,
"loss": 1.1901,
"step": 38750
},
{
"epoch": 28.96,
"learning_rate": 1.9099817306095337e-06,
"loss": 1.2687,
"step": 38760
},
{
"epoch": 28.97,
"learning_rate": 1.8961412832862758e-06,
"loss": 1.2467,
"step": 38770
},
{
"epoch": 28.97,
"learning_rate": 1.8823008359630184e-06,
"loss": 1.2268,
"step": 38780
},
{
"epoch": 28.98,
"learning_rate": 1.868460388639761e-06,
"loss": 1.2551,
"step": 38790
},
{
"epoch": 28.99,
"learning_rate": 1.8546199413165034e-06,
"loss": 1.267,
"step": 38800
},
{
"epoch": 29.0,
"learning_rate": 1.8407794939932461e-06,
"loss": 1.2394,
"step": 38810
},
{
"epoch": 29.0,
"eval_accuracy": 0.6071222228058196,
"eval_loss": 0.9237757921218872,
"eval_runtime": 70.9838,
"eval_samples_per_second": 268.216,
"eval_steps_per_second": 8.382,
"step": 38816
},
{
"epoch": 29.0,
"learning_rate": 1.8269390466699885e-06,
"loss": 1.2248,
"step": 38820
},
{
"epoch": 29.01,
"learning_rate": 1.8130985993467308e-06,
"loss": 1.2329,
"step": 38830
},
{
"epoch": 29.02,
"learning_rate": 1.7992581520234735e-06,
"loss": 1.2485,
"step": 38840
},
{
"epoch": 29.03,
"learning_rate": 1.7854177047002158e-06,
"loss": 1.2491,
"step": 38850
},
{
"epoch": 29.03,
"learning_rate": 1.7715772573769586e-06,
"loss": 1.2439,
"step": 38860
},
{
"epoch": 29.04,
"learning_rate": 1.7577368100537009e-06,
"loss": 1.2607,
"step": 38870
},
{
"epoch": 29.05,
"learning_rate": 1.7438963627304436e-06,
"loss": 1.2243,
"step": 38880
},
{
"epoch": 29.05,
"learning_rate": 1.7300559154071861e-06,
"loss": 1.244,
"step": 38890
},
{
"epoch": 29.06,
"learning_rate": 1.7162154680839284e-06,
"loss": 1.2481,
"step": 38900
},
{
"epoch": 29.07,
"learning_rate": 1.7023750207606712e-06,
"loss": 1.2353,
"step": 38910
},
{
"epoch": 29.08,
"learning_rate": 1.6885345734374135e-06,
"loss": 1.245,
"step": 38920
},
{
"epoch": 29.08,
"learning_rate": 1.6746941261141562e-06,
"loss": 1.2304,
"step": 38930
},
{
"epoch": 29.09,
"learning_rate": 1.6608536787908985e-06,
"loss": 1.2325,
"step": 38940
},
{
"epoch": 29.1,
"learning_rate": 1.6470132314676413e-06,
"loss": 1.2148,
"step": 38950
},
{
"epoch": 29.11,
"learning_rate": 1.6331727841443836e-06,
"loss": 1.2628,
"step": 38960
},
{
"epoch": 29.11,
"learning_rate": 1.619332336821126e-06,
"loss": 1.2216,
"step": 38970
},
{
"epoch": 29.12,
"learning_rate": 1.6054918894978686e-06,
"loss": 1.2009,
"step": 38980
},
{
"epoch": 29.13,
"learning_rate": 1.5916514421746112e-06,
"loss": 1.2308,
"step": 38990
},
{
"epoch": 29.14,
"learning_rate": 1.5778109948513537e-06,
"loss": 1.2167,
"step": 39000
},
{
"epoch": 29.14,
"learning_rate": 1.5639705475280962e-06,
"loss": 1.2514,
"step": 39010
},
{
"epoch": 29.15,
"learning_rate": 1.5501301002048387e-06,
"loss": 1.2253,
"step": 39020
},
{
"epoch": 29.16,
"learning_rate": 1.5362896528815813e-06,
"loss": 1.244,
"step": 39030
},
{
"epoch": 29.17,
"learning_rate": 1.5224492055583238e-06,
"loss": 1.2668,
"step": 39040
},
{
"epoch": 29.17,
"learning_rate": 1.5086087582350663e-06,
"loss": 1.1895,
"step": 39050
},
{
"epoch": 29.18,
"learning_rate": 1.4947683109118086e-06,
"loss": 1.2322,
"step": 39060
},
{
"epoch": 29.19,
"learning_rate": 1.4809278635885511e-06,
"loss": 1.2264,
"step": 39070
},
{
"epoch": 29.2,
"learning_rate": 1.4670874162652937e-06,
"loss": 1.2535,
"step": 39080
},
{
"epoch": 29.2,
"learning_rate": 1.4532469689420362e-06,
"loss": 1.2625,
"step": 39090
},
{
"epoch": 29.21,
"learning_rate": 1.4394065216187787e-06,
"loss": 1.2548,
"step": 39100
},
{
"epoch": 29.22,
"learning_rate": 1.4255660742955215e-06,
"loss": 1.237,
"step": 39110
},
{
"epoch": 29.23,
"learning_rate": 1.411725626972264e-06,
"loss": 1.2302,
"step": 39120
},
{
"epoch": 29.23,
"learning_rate": 1.3978851796490063e-06,
"loss": 1.2121,
"step": 39130
},
{
"epoch": 29.24,
"learning_rate": 1.3840447323257488e-06,
"loss": 1.2689,
"step": 39140
},
{
"epoch": 29.25,
"learning_rate": 1.3702042850024913e-06,
"loss": 1.2281,
"step": 39150
},
{
"epoch": 29.26,
"learning_rate": 1.3563638376792339e-06,
"loss": 1.2071,
"step": 39160
},
{
"epoch": 29.26,
"learning_rate": 1.3425233903559764e-06,
"loss": 1.2359,
"step": 39170
},
{
"epoch": 29.27,
"learning_rate": 1.328682943032719e-06,
"loss": 1.2291,
"step": 39180
},
{
"epoch": 29.28,
"learning_rate": 1.3148424957094612e-06,
"loss": 1.2225,
"step": 39190
},
{
"epoch": 29.29,
"learning_rate": 1.3010020483862038e-06,
"loss": 1.2443,
"step": 39200
},
{
"epoch": 29.29,
"learning_rate": 1.2871616010629465e-06,
"loss": 1.2114,
"step": 39210
},
{
"epoch": 29.3,
"learning_rate": 1.273321153739689e-06,
"loss": 1.2279,
"step": 39220
},
{
"epoch": 29.31,
"learning_rate": 1.2594807064164315e-06,
"loss": 1.2495,
"step": 39230
},
{
"epoch": 29.32,
"learning_rate": 1.245640259093174e-06,
"loss": 1.2409,
"step": 39240
},
{
"epoch": 29.32,
"learning_rate": 1.2317998117699166e-06,
"loss": 1.2519,
"step": 39250
},
{
"epoch": 29.33,
"learning_rate": 1.217959364446659e-06,
"loss": 1.2373,
"step": 39260
},
{
"epoch": 29.34,
"learning_rate": 1.2041189171234014e-06,
"loss": 1.2686,
"step": 39270
},
{
"epoch": 29.35,
"learning_rate": 1.190278469800144e-06,
"loss": 1.1859,
"step": 39280
},
{
"epoch": 29.35,
"learning_rate": 1.1764380224768865e-06,
"loss": 1.2415,
"step": 39290
},
{
"epoch": 29.36,
"learning_rate": 1.162597575153629e-06,
"loss": 1.2488,
"step": 39300
},
{
"epoch": 29.37,
"learning_rate": 1.1487571278303715e-06,
"loss": 1.2577,
"step": 39310
},
{
"epoch": 29.38,
"learning_rate": 1.134916680507114e-06,
"loss": 1.2265,
"step": 39320
},
{
"epoch": 29.38,
"learning_rate": 1.1210762331838566e-06,
"loss": 1.2418,
"step": 39330
},
{
"epoch": 29.39,
"learning_rate": 1.107235785860599e-06,
"loss": 1.2372,
"step": 39340
},
{
"epoch": 29.4,
"learning_rate": 1.0933953385373416e-06,
"loss": 1.2578,
"step": 39350
},
{
"epoch": 29.41,
"learning_rate": 1.0795548912140841e-06,
"loss": 1.2531,
"step": 39360
},
{
"epoch": 29.41,
"learning_rate": 1.0657144438908267e-06,
"loss": 1.1976,
"step": 39370
},
{
"epoch": 29.42,
"learning_rate": 1.0518739965675692e-06,
"loss": 1.2488,
"step": 39380
},
{
"epoch": 29.43,
"learning_rate": 1.0380335492443115e-06,
"loss": 1.2621,
"step": 39390
},
{
"epoch": 29.44,
"learning_rate": 1.024193101921054e-06,
"loss": 1.1992,
"step": 39400
},
{
"epoch": 29.44,
"learning_rate": 1.0103526545977966e-06,
"loss": 1.2491,
"step": 39410
},
{
"epoch": 29.45,
"learning_rate": 9.96512207274539e-07,
"loss": 1.2248,
"step": 39420
},
{
"epoch": 29.46,
"learning_rate": 9.826717599512816e-07,
"loss": 1.2636,
"step": 39430
},
{
"epoch": 29.47,
"learning_rate": 9.688313126280243e-07,
"loss": 1.2245,
"step": 39440
},
{
"epoch": 29.47,
"learning_rate": 9.549908653047669e-07,
"loss": 1.2123,
"step": 39450
},
{
"epoch": 29.48,
"learning_rate": 9.411504179815092e-07,
"loss": 1.2346,
"step": 39460
},
{
"epoch": 29.49,
"learning_rate": 9.273099706582517e-07,
"loss": 1.2467,
"step": 39470
},
{
"epoch": 29.5,
"learning_rate": 9.134695233349942e-07,
"loss": 1.2451,
"step": 39480
},
{
"epoch": 29.5,
"learning_rate": 8.996290760117368e-07,
"loss": 1.2892,
"step": 39490
},
{
"epoch": 29.51,
"learning_rate": 8.857886286884793e-07,
"loss": 1.2283,
"step": 39500
},
{
"epoch": 29.52,
"learning_rate": 8.719481813652218e-07,
"loss": 1.1951,
"step": 39510
},
{
"epoch": 29.53,
"learning_rate": 8.581077340419642e-07,
"loss": 1.2414,
"step": 39520
},
{
"epoch": 29.53,
"learning_rate": 8.442672867187067e-07,
"loss": 1.2134,
"step": 39530
},
{
"epoch": 29.54,
"learning_rate": 8.304268393954493e-07,
"loss": 1.2513,
"step": 39540
},
{
"epoch": 29.55,
"learning_rate": 8.165863920721918e-07,
"loss": 1.2432,
"step": 39550
},
{
"epoch": 29.56,
"learning_rate": 8.027459447489343e-07,
"loss": 1.2015,
"step": 39560
},
{
"epoch": 29.56,
"learning_rate": 7.889054974256768e-07,
"loss": 1.2062,
"step": 39570
},
{
"epoch": 29.57,
"learning_rate": 7.750650501024194e-07,
"loss": 1.2364,
"step": 39580
},
{
"epoch": 29.58,
"learning_rate": 7.612246027791619e-07,
"loss": 1.2479,
"step": 39590
},
{
"epoch": 29.59,
"learning_rate": 7.473841554559043e-07,
"loss": 1.2387,
"step": 39600
},
{
"epoch": 29.59,
"learning_rate": 7.335437081326468e-07,
"loss": 1.2539,
"step": 39610
},
{
"epoch": 29.6,
"learning_rate": 7.197032608093894e-07,
"loss": 1.2142,
"step": 39620
},
{
"epoch": 29.61,
"learning_rate": 7.05862813486132e-07,
"loss": 1.207,
"step": 39630
},
{
"epoch": 29.62,
"learning_rate": 6.920223661628744e-07,
"loss": 1.2416,
"step": 39640
},
{
"epoch": 29.62,
"learning_rate": 6.781819188396169e-07,
"loss": 1.248,
"step": 39650
},
{
"epoch": 29.63,
"learning_rate": 6.643414715163595e-07,
"loss": 1.2145,
"step": 39660
},
{
"epoch": 29.64,
"learning_rate": 6.505010241931019e-07,
"loss": 1.1678,
"step": 39670
},
{
"epoch": 29.65,
"learning_rate": 6.366605768698445e-07,
"loss": 1.2576,
"step": 39680
},
{
"epoch": 29.65,
"learning_rate": 6.22820129546587e-07,
"loss": 1.1921,
"step": 39690
},
{
"epoch": 29.66,
"learning_rate": 6.089796822233295e-07,
"loss": 1.2423,
"step": 39700
},
{
"epoch": 29.67,
"learning_rate": 5.95139234900072e-07,
"loss": 1.2407,
"step": 39710
},
{
"epoch": 29.68,
"learning_rate": 5.812987875768145e-07,
"loss": 1.2101,
"step": 39720
},
{
"epoch": 29.68,
"learning_rate": 5.67458340253557e-07,
"loss": 1.2251,
"step": 39730
},
{
"epoch": 29.69,
"learning_rate": 5.536178929302995e-07,
"loss": 1.3015,
"step": 39740
},
{
"epoch": 29.7,
"learning_rate": 5.397774456070421e-07,
"loss": 1.2355,
"step": 39750
},
{
"epoch": 29.7,
"learning_rate": 5.259369982837846e-07,
"loss": 1.2301,
"step": 39760
},
{
"epoch": 29.71,
"learning_rate": 5.12096550960527e-07,
"loss": 1.2726,
"step": 39770
},
{
"epoch": 29.72,
"learning_rate": 4.982561036372695e-07,
"loss": 1.2288,
"step": 39780
},
{
"epoch": 29.73,
"learning_rate": 4.844156563140122e-07,
"loss": 1.2378,
"step": 39790
},
{
"epoch": 29.73,
"learning_rate": 4.705752089907546e-07,
"loss": 1.2559,
"step": 39800
},
{
"epoch": 29.74,
"learning_rate": 4.567347616674971e-07,
"loss": 1.2253,
"step": 39810
},
{
"epoch": 29.75,
"learning_rate": 4.4289431434423964e-07,
"loss": 1.2589,
"step": 39820
},
{
"epoch": 29.76,
"learning_rate": 4.290538670209821e-07,
"loss": 1.2607,
"step": 39830
},
{
"epoch": 29.76,
"learning_rate": 4.1521341969772463e-07,
"loss": 1.2325,
"step": 39840
},
{
"epoch": 29.77,
"learning_rate": 4.0137297237446716e-07,
"loss": 1.2282,
"step": 39850
},
{
"epoch": 29.78,
"learning_rate": 3.875325250512097e-07,
"loss": 1.2392,
"step": 39860
},
{
"epoch": 29.79,
"learning_rate": 3.7369207772795216e-07,
"loss": 1.2243,
"step": 39870
},
{
"epoch": 29.79,
"learning_rate": 3.598516304046947e-07,
"loss": 1.2104,
"step": 39880
},
{
"epoch": 29.8,
"learning_rate": 3.460111830814372e-07,
"loss": 1.2351,
"step": 39890
},
{
"epoch": 29.81,
"learning_rate": 3.3217073575817973e-07,
"loss": 1.2352,
"step": 39900
},
{
"epoch": 29.82,
"learning_rate": 3.1833028843492225e-07,
"loss": 1.2204,
"step": 39910
},
{
"epoch": 29.82,
"learning_rate": 3.044898411116647e-07,
"loss": 1.1946,
"step": 39920
},
{
"epoch": 29.83,
"learning_rate": 2.9064939378840725e-07,
"loss": 1.2349,
"step": 39930
},
{
"epoch": 29.84,
"learning_rate": 2.768089464651498e-07,
"loss": 1.2407,
"step": 39940
},
{
"epoch": 29.85,
"learning_rate": 2.629684991418923e-07,
"loss": 1.2566,
"step": 39950
},
{
"epoch": 29.85,
"learning_rate": 2.4912805181863477e-07,
"loss": 1.1997,
"step": 39960
},
{
"epoch": 29.86,
"learning_rate": 2.352876044953773e-07,
"loss": 1.2255,
"step": 39970
},
{
"epoch": 29.87,
"learning_rate": 2.2144715717211982e-07,
"loss": 1.2385,
"step": 39980
},
{
"epoch": 29.88,
"learning_rate": 2.0760670984886232e-07,
"loss": 1.1955,
"step": 39990
},
{
"epoch": 29.88,
"learning_rate": 1.9376626252560484e-07,
"loss": 1.2404,
"step": 40000
},
{
"epoch": 29.89,
"learning_rate": 1.7992581520234734e-07,
"loss": 1.2232,
"step": 40010
},
{
"epoch": 29.9,
"learning_rate": 1.6608536787908986e-07,
"loss": 1.2222,
"step": 40020
},
{
"epoch": 29.91,
"learning_rate": 1.5224492055583236e-07,
"loss": 1.2672,
"step": 40030
},
{
"epoch": 29.91,
"learning_rate": 1.384044732325749e-07,
"loss": 1.237,
"step": 40040
},
{
"epoch": 29.92,
"learning_rate": 1.2456402590931739e-07,
"loss": 1.2306,
"step": 40050
},
{
"epoch": 29.93,
"learning_rate": 1.1072357858605991e-07,
"loss": 1.2456,
"step": 40060
},
{
"epoch": 29.94,
"learning_rate": 9.688313126280242e-08,
"loss": 1.246,
"step": 40070
},
{
"epoch": 29.94,
"learning_rate": 8.304268393954493e-08,
"loss": 1.2072,
"step": 40080
},
{
"epoch": 29.95,
"learning_rate": 6.920223661628744e-08,
"loss": 1.2481,
"step": 40090
},
{
"epoch": 29.96,
"learning_rate": 5.5361789293029955e-08,
"loss": 1.2429,
"step": 40100
},
{
"epoch": 29.97,
"learning_rate": 4.1521341969772466e-08,
"loss": 1.2238,
"step": 40110
},
{
"epoch": 29.97,
"learning_rate": 2.7680894646514977e-08,
"loss": 1.2545,
"step": 40120
},
{
"epoch": 29.98,
"learning_rate": 1.3840447323257489e-08,
"loss": 1.2447,
"step": 40130
},
{
"epoch": 29.99,
"learning_rate": 0.0,
"loss": 1.209,
"step": 40140
},
{
"epoch": 29.99,
"eval_accuracy": 0.6064394138347602,
"eval_loss": 0.9284469485282898,
"eval_runtime": 71.323,
"eval_samples_per_second": 266.941,
"eval_steps_per_second": 8.342,
"step": 40140
},
{
"epoch": 29.99,
"step": 40140,
"total_flos": 1.2772585258323601e+20,
"train_loss": 1.3304368447710997,
"train_runtime": 71351.4911,
"train_samples_per_second": 72.031,
"train_steps_per_second": 0.563
}
],
"logging_steps": 10,
"max_steps": 40140,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 1.2772585258323601e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}