tableqa_tapex_wtq / trainer_state.json
vishwajeetkumar's picture
Uploaded training args and trainer state
888688d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 56.65559872836454,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 3.0000000000000004e-07,
"loss": 14.1422,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 6.000000000000001e-07,
"loss": 9.7115,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 9e-07,
"loss": 6.8111,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 1.2000000000000002e-06,
"loss": 5.2187,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 1.5e-06,
"loss": 4.6434,
"step": 50
},
{
"epoch": 0.17,
"learning_rate": 1.8e-06,
"loss": 4.2415,
"step": 60
},
{
"epoch": 0.2,
"learning_rate": 2.1000000000000002e-06,
"loss": 3.7669,
"step": 70
},
{
"epoch": 0.23,
"learning_rate": 2.4000000000000003e-06,
"loss": 3.6312,
"step": 80
},
{
"epoch": 0.25,
"learning_rate": 2.7e-06,
"loss": 3.374,
"step": 90
},
{
"epoch": 0.28,
"learning_rate": 3e-06,
"loss": 3.1393,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 3.3e-06,
"loss": 2.9762,
"step": 110
},
{
"epoch": 0.34,
"learning_rate": 3.6e-06,
"loss": 2.9393,
"step": 120
},
{
"epoch": 0.37,
"learning_rate": 3.9e-06,
"loss": 2.814,
"step": 130
},
{
"epoch": 0.4,
"learning_rate": 4.2000000000000004e-06,
"loss": 2.7919,
"step": 140
},
{
"epoch": 0.42,
"learning_rate": 4.5e-06,
"loss": 2.7482,
"step": 150
},
{
"epoch": 0.45,
"learning_rate": 4.800000000000001e-06,
"loss": 2.7322,
"step": 160
},
{
"epoch": 0.48,
"learning_rate": 5.1e-06,
"loss": 2.6329,
"step": 170
},
{
"epoch": 0.51,
"learning_rate": 5.4e-06,
"loss": 2.5376,
"step": 180
},
{
"epoch": 0.54,
"learning_rate": 5.7000000000000005e-06,
"loss": 2.5884,
"step": 190
},
{
"epoch": 0.57,
"learning_rate": 6e-06,
"loss": 2.499,
"step": 200
},
{
"epoch": 0.59,
"learning_rate": 6.3e-06,
"loss": 2.4366,
"step": 210
},
{
"epoch": 0.62,
"learning_rate": 6.6e-06,
"loss": 2.575,
"step": 220
},
{
"epoch": 0.65,
"learning_rate": 6.900000000000001e-06,
"loss": 2.4937,
"step": 230
},
{
"epoch": 0.68,
"learning_rate": 7.2e-06,
"loss": 2.4204,
"step": 240
},
{
"epoch": 0.71,
"learning_rate": 7.5e-06,
"loss": 2.3175,
"step": 250
},
{
"epoch": 0.73,
"learning_rate": 7.8e-06,
"loss": 2.4137,
"step": 260
},
{
"epoch": 0.76,
"learning_rate": 8.1e-06,
"loss": 2.3216,
"step": 270
},
{
"epoch": 0.79,
"learning_rate": 8.400000000000001e-06,
"loss": 2.2545,
"step": 280
},
{
"epoch": 0.82,
"learning_rate": 8.7e-06,
"loss": 2.2574,
"step": 290
},
{
"epoch": 0.85,
"learning_rate": 9e-06,
"loss": 2.2952,
"step": 300
},
{
"epoch": 0.88,
"learning_rate": 9.3e-06,
"loss": 2.3082,
"step": 310
},
{
"epoch": 0.9,
"learning_rate": 9.600000000000001e-06,
"loss": 2.3056,
"step": 320
},
{
"epoch": 0.93,
"learning_rate": 9.9e-06,
"loss": 2.2697,
"step": 330
},
{
"epoch": 0.96,
"learning_rate": 1.02e-05,
"loss": 2.2188,
"step": 340
},
{
"epoch": 0.99,
"learning_rate": 1.05e-05,
"loss": 2.2091,
"step": 350
},
{
"epoch": 1.02,
"learning_rate": 1.08e-05,
"loss": 2.3702,
"step": 360
},
{
"epoch": 1.05,
"learning_rate": 1.11e-05,
"loss": 2.2407,
"step": 370
},
{
"epoch": 1.08,
"learning_rate": 1.1400000000000001e-05,
"loss": 2.1957,
"step": 380
},
{
"epoch": 1.1,
"learning_rate": 1.1700000000000001e-05,
"loss": 2.2061,
"step": 390
},
{
"epoch": 1.13,
"learning_rate": 1.2e-05,
"loss": 2.1418,
"step": 400
},
{
"epoch": 1.16,
"learning_rate": 1.2299999999999999e-05,
"loss": 2.1444,
"step": 410
},
{
"epoch": 1.19,
"learning_rate": 1.26e-05,
"loss": 2.092,
"step": 420
},
{
"epoch": 1.22,
"learning_rate": 1.29e-05,
"loss": 2.1546,
"step": 430
},
{
"epoch": 1.25,
"learning_rate": 1.32e-05,
"loss": 2.2028,
"step": 440
},
{
"epoch": 1.27,
"learning_rate": 1.3500000000000001e-05,
"loss": 2.1954,
"step": 450
},
{
"epoch": 1.3,
"learning_rate": 1.3800000000000002e-05,
"loss": 2.0972,
"step": 460
},
{
"epoch": 1.33,
"learning_rate": 1.4099999999999999e-05,
"loss": 2.1261,
"step": 470
},
{
"epoch": 1.36,
"learning_rate": 1.44e-05,
"loss": 2.2221,
"step": 480
},
{
"epoch": 1.39,
"learning_rate": 1.47e-05,
"loss": 2.164,
"step": 490
},
{
"epoch": 1.42,
"learning_rate": 1.5e-05,
"loss": 2.1335,
"step": 500
},
{
"epoch": 1.44,
"learning_rate": 1.53e-05,
"loss": 2.0772,
"step": 510
},
{
"epoch": 1.47,
"learning_rate": 1.56e-05,
"loss": 2.0248,
"step": 520
},
{
"epoch": 1.5,
"learning_rate": 1.59e-05,
"loss": 2.0828,
"step": 530
},
{
"epoch": 1.53,
"learning_rate": 1.62e-05,
"loss": 2.1405,
"step": 540
},
{
"epoch": 1.56,
"learning_rate": 1.65e-05,
"loss": 2.1052,
"step": 550
},
{
"epoch": 1.58,
"learning_rate": 1.6800000000000002e-05,
"loss": 2.0947,
"step": 560
},
{
"epoch": 1.61,
"learning_rate": 1.71e-05,
"loss": 2.1731,
"step": 570
},
{
"epoch": 1.64,
"learning_rate": 1.74e-05,
"loss": 2.0155,
"step": 580
},
{
"epoch": 1.67,
"learning_rate": 1.77e-05,
"loss": 2.0695,
"step": 590
},
{
"epoch": 1.7,
"learning_rate": 1.8e-05,
"loss": 2.1026,
"step": 600
},
{
"epoch": 1.73,
"learning_rate": 1.83e-05,
"loss": 2.1209,
"step": 610
},
{
"epoch": 1.75,
"learning_rate": 1.86e-05,
"loss": 2.0839,
"step": 620
},
{
"epoch": 1.78,
"learning_rate": 1.8900000000000002e-05,
"loss": 2.1547,
"step": 630
},
{
"epoch": 1.81,
"learning_rate": 1.9200000000000003e-05,
"loss": 2.0353,
"step": 640
},
{
"epoch": 1.84,
"learning_rate": 1.95e-05,
"loss": 2.091,
"step": 650
},
{
"epoch": 1.87,
"learning_rate": 1.98e-05,
"loss": 2.0464,
"step": 660
},
{
"epoch": 1.9,
"learning_rate": 2.01e-05,
"loss": 1.9735,
"step": 670
},
{
"epoch": 1.92,
"learning_rate": 2.04e-05,
"loss": 2.0142,
"step": 680
},
{
"epoch": 1.95,
"learning_rate": 2.07e-05,
"loss": 2.0526,
"step": 690
},
{
"epoch": 1.98,
"learning_rate": 2.1e-05,
"loss": 2.1198,
"step": 700
},
{
"epoch": 2.01,
"learning_rate": 2.13e-05,
"loss": 2.1764,
"step": 710
},
{
"epoch": 2.04,
"learning_rate": 2.16e-05,
"loss": 1.9925,
"step": 720
},
{
"epoch": 2.07,
"learning_rate": 2.19e-05,
"loss": 1.9689,
"step": 730
},
{
"epoch": 2.1,
"learning_rate": 2.22e-05,
"loss": 1.8489,
"step": 740
},
{
"epoch": 2.12,
"learning_rate": 2.25e-05,
"loss": 1.9314,
"step": 750
},
{
"epoch": 2.15,
"learning_rate": 2.2800000000000002e-05,
"loss": 1.8814,
"step": 760
},
{
"epoch": 2.18,
"learning_rate": 2.3100000000000002e-05,
"loss": 1.9512,
"step": 770
},
{
"epoch": 2.21,
"learning_rate": 2.3400000000000003e-05,
"loss": 1.9251,
"step": 780
},
{
"epoch": 2.24,
"learning_rate": 2.37e-05,
"loss": 1.9665,
"step": 790
},
{
"epoch": 2.27,
"learning_rate": 2.4e-05,
"loss": 2.0072,
"step": 800
},
{
"epoch": 2.29,
"learning_rate": 2.43e-05,
"loss": 2.0209,
"step": 810
},
{
"epoch": 2.32,
"learning_rate": 2.4599999999999998e-05,
"loss": 1.9674,
"step": 820
},
{
"epoch": 2.35,
"learning_rate": 2.49e-05,
"loss": 1.895,
"step": 830
},
{
"epoch": 2.38,
"learning_rate": 2.52e-05,
"loss": 1.9893,
"step": 840
},
{
"epoch": 2.41,
"learning_rate": 2.55e-05,
"loss": 2.0037,
"step": 850
},
{
"epoch": 2.44,
"learning_rate": 2.58e-05,
"loss": 2.0346,
"step": 860
},
{
"epoch": 2.46,
"learning_rate": 2.61e-05,
"loss": 2.0051,
"step": 870
},
{
"epoch": 2.49,
"learning_rate": 2.64e-05,
"loss": 2.0522,
"step": 880
},
{
"epoch": 2.52,
"learning_rate": 2.6700000000000002e-05,
"loss": 1.984,
"step": 890
},
{
"epoch": 2.55,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.9341,
"step": 900
},
{
"epoch": 2.58,
"learning_rate": 2.7300000000000003e-05,
"loss": 2.052,
"step": 910
},
{
"epoch": 2.6,
"learning_rate": 2.7600000000000003e-05,
"loss": 2.0514,
"step": 920
},
{
"epoch": 2.63,
"learning_rate": 2.79e-05,
"loss": 2.0879,
"step": 930
},
{
"epoch": 2.66,
"learning_rate": 2.8199999999999998e-05,
"loss": 2.0271,
"step": 940
},
{
"epoch": 2.69,
"learning_rate": 2.8499999999999998e-05,
"loss": 2.022,
"step": 950
},
{
"epoch": 2.72,
"learning_rate": 2.88e-05,
"loss": 1.9329,
"step": 960
},
{
"epoch": 2.75,
"learning_rate": 2.91e-05,
"loss": 2.0669,
"step": 970
},
{
"epoch": 2.77,
"learning_rate": 2.94e-05,
"loss": 1.9011,
"step": 980
},
{
"epoch": 2.8,
"learning_rate": 2.97e-05,
"loss": 1.9225,
"step": 990
},
{
"epoch": 2.83,
"learning_rate": 3e-05,
"loss": 1.9818,
"step": 1000
},
{
"epoch": 2.83,
"eval_denotation_accuracy": 0.5192511480042388,
"eval_loss": 2.01488995552063,
"eval_runtime": 563.2088,
"eval_samples_per_second": 5.027,
"eval_steps_per_second": 1.257,
"step": 1000
},
{
"epoch": 2.86,
"learning_rate": 2.998421052631579e-05,
"loss": 1.9532,
"step": 1010
},
{
"epoch": 2.89,
"learning_rate": 2.9968421052631578e-05,
"loss": 1.9891,
"step": 1020
},
{
"epoch": 2.92,
"learning_rate": 2.9952631578947372e-05,
"loss": 1.9292,
"step": 1030
},
{
"epoch": 2.94,
"learning_rate": 2.993684210526316e-05,
"loss": 2.0779,
"step": 1040
},
{
"epoch": 2.97,
"learning_rate": 2.992105263157895e-05,
"loss": 1.9649,
"step": 1050
},
{
"epoch": 3.0,
"learning_rate": 2.9905263157894736e-05,
"loss": 2.1161,
"step": 1060
},
{
"epoch": 3.03,
"learning_rate": 2.988947368421053e-05,
"loss": 1.8847,
"step": 1070
},
{
"epoch": 3.06,
"learning_rate": 2.9873684210526317e-05,
"loss": 1.8701,
"step": 1080
},
{
"epoch": 3.09,
"learning_rate": 2.9857894736842107e-05,
"loss": 1.9537,
"step": 1090
},
{
"epoch": 3.12,
"learning_rate": 2.9842105263157894e-05,
"loss": 1.8578,
"step": 1100
},
{
"epoch": 3.14,
"learning_rate": 2.9826315789473684e-05,
"loss": 1.8608,
"step": 1110
},
{
"epoch": 3.17,
"learning_rate": 2.9810526315789475e-05,
"loss": 1.8986,
"step": 1120
},
{
"epoch": 3.2,
"learning_rate": 2.9794736842105265e-05,
"loss": 1.8624,
"step": 1130
},
{
"epoch": 3.23,
"learning_rate": 2.9778947368421052e-05,
"loss": 1.9263,
"step": 1140
},
{
"epoch": 3.26,
"learning_rate": 2.9763157894736842e-05,
"loss": 1.8791,
"step": 1150
},
{
"epoch": 3.29,
"learning_rate": 2.974736842105263e-05,
"loss": 1.8547,
"step": 1160
},
{
"epoch": 3.31,
"learning_rate": 2.9731578947368423e-05,
"loss": 1.8435,
"step": 1170
},
{
"epoch": 3.34,
"learning_rate": 2.9715789473684213e-05,
"loss": 1.9111,
"step": 1180
},
{
"epoch": 3.37,
"learning_rate": 2.97e-05,
"loss": 1.8597,
"step": 1190
},
{
"epoch": 3.4,
"learning_rate": 2.968421052631579e-05,
"loss": 1.872,
"step": 1200
},
{
"epoch": 3.43,
"learning_rate": 2.966842105263158e-05,
"loss": 1.861,
"step": 1210
},
{
"epoch": 3.45,
"learning_rate": 2.965263157894737e-05,
"loss": 1.8506,
"step": 1220
},
{
"epoch": 3.48,
"learning_rate": 2.9636842105263158e-05,
"loss": 1.9002,
"step": 1230
},
{
"epoch": 3.51,
"learning_rate": 2.962105263157895e-05,
"loss": 1.7997,
"step": 1240
},
{
"epoch": 3.54,
"learning_rate": 2.9605263157894735e-05,
"loss": 1.846,
"step": 1250
},
{
"epoch": 3.57,
"learning_rate": 2.958947368421053e-05,
"loss": 1.9717,
"step": 1260
},
{
"epoch": 3.6,
"learning_rate": 2.9573684210526316e-05,
"loss": 1.8678,
"step": 1270
},
{
"epoch": 3.62,
"learning_rate": 2.9557894736842107e-05,
"loss": 1.8516,
"step": 1280
},
{
"epoch": 3.65,
"learning_rate": 2.9542105263157893e-05,
"loss": 1.8293,
"step": 1290
},
{
"epoch": 3.68,
"learning_rate": 2.9526315789473684e-05,
"loss": 1.9229,
"step": 1300
},
{
"epoch": 3.71,
"learning_rate": 2.9510526315789474e-05,
"loss": 1.9239,
"step": 1310
},
{
"epoch": 3.74,
"learning_rate": 2.9494736842105264e-05,
"loss": 1.8524,
"step": 1320
},
{
"epoch": 3.77,
"learning_rate": 2.947894736842105e-05,
"loss": 1.9316,
"step": 1330
},
{
"epoch": 3.79,
"learning_rate": 2.9463157894736842e-05,
"loss": 1.8367,
"step": 1340
},
{
"epoch": 3.82,
"learning_rate": 2.9447368421052635e-05,
"loss": 1.8685,
"step": 1350
},
{
"epoch": 3.85,
"learning_rate": 2.9431578947368422e-05,
"loss": 1.8507,
"step": 1360
},
{
"epoch": 3.88,
"learning_rate": 2.9415789473684213e-05,
"loss": 1.8836,
"step": 1370
},
{
"epoch": 3.91,
"learning_rate": 2.94e-05,
"loss": 1.9116,
"step": 1380
},
{
"epoch": 3.94,
"learning_rate": 2.938421052631579e-05,
"loss": 1.8836,
"step": 1390
},
{
"epoch": 3.96,
"learning_rate": 2.936842105263158e-05,
"loss": 1.8195,
"step": 1400
},
{
"epoch": 3.99,
"learning_rate": 2.935263157894737e-05,
"loss": 1.9163,
"step": 1410
},
{
"epoch": 4.02,
"learning_rate": 2.9336842105263158e-05,
"loss": 1.912,
"step": 1420
},
{
"epoch": 4.05,
"learning_rate": 2.9321052631578948e-05,
"loss": 1.7437,
"step": 1430
},
{
"epoch": 4.08,
"learning_rate": 2.9305263157894735e-05,
"loss": 1.7872,
"step": 1440
},
{
"epoch": 4.11,
"learning_rate": 2.928947368421053e-05,
"loss": 1.7588,
"step": 1450
},
{
"epoch": 4.14,
"learning_rate": 2.9273684210526316e-05,
"loss": 1.7983,
"step": 1460
},
{
"epoch": 4.16,
"learning_rate": 2.9257894736842106e-05,
"loss": 1.8219,
"step": 1470
},
{
"epoch": 4.19,
"learning_rate": 2.9242105263157893e-05,
"loss": 1.8267,
"step": 1480
},
{
"epoch": 4.22,
"learning_rate": 2.9226315789473687e-05,
"loss": 1.8059,
"step": 1490
},
{
"epoch": 4.25,
"learning_rate": 2.9210526315789474e-05,
"loss": 1.7878,
"step": 1500
},
{
"epoch": 4.28,
"learning_rate": 2.9194736842105264e-05,
"loss": 1.8023,
"step": 1510
},
{
"epoch": 4.31,
"learning_rate": 2.9178947368421054e-05,
"loss": 1.7739,
"step": 1520
},
{
"epoch": 4.33,
"learning_rate": 2.916315789473684e-05,
"loss": 1.7929,
"step": 1530
},
{
"epoch": 4.36,
"learning_rate": 2.9147368421052635e-05,
"loss": 1.8113,
"step": 1540
},
{
"epoch": 4.39,
"learning_rate": 2.9131578947368422e-05,
"loss": 1.811,
"step": 1550
},
{
"epoch": 4.42,
"learning_rate": 2.9115789473684212e-05,
"loss": 1.7617,
"step": 1560
},
{
"epoch": 4.45,
"learning_rate": 2.91e-05,
"loss": 1.837,
"step": 1570
},
{
"epoch": 4.47,
"learning_rate": 2.908421052631579e-05,
"loss": 1.8516,
"step": 1580
},
{
"epoch": 4.5,
"learning_rate": 2.906842105263158e-05,
"loss": 1.8067,
"step": 1590
},
{
"epoch": 4.53,
"learning_rate": 2.905263157894737e-05,
"loss": 1.7737,
"step": 1600
},
{
"epoch": 4.56,
"learning_rate": 2.9036842105263157e-05,
"loss": 1.789,
"step": 1610
},
{
"epoch": 4.59,
"learning_rate": 2.9021052631578948e-05,
"loss": 1.8488,
"step": 1620
},
{
"epoch": 4.62,
"learning_rate": 2.9005263157894738e-05,
"loss": 1.8083,
"step": 1630
},
{
"epoch": 4.64,
"learning_rate": 2.8989473684210528e-05,
"loss": 1.8181,
"step": 1640
},
{
"epoch": 4.67,
"learning_rate": 2.8973684210526315e-05,
"loss": 1.7633,
"step": 1650
},
{
"epoch": 4.7,
"learning_rate": 2.8957894736842105e-05,
"loss": 1.7521,
"step": 1660
},
{
"epoch": 4.73,
"learning_rate": 2.8942105263157896e-05,
"loss": 1.7865,
"step": 1670
},
{
"epoch": 4.76,
"learning_rate": 2.8926315789473686e-05,
"loss": 1.7808,
"step": 1680
},
{
"epoch": 4.79,
"learning_rate": 2.8910526315789476e-05,
"loss": 1.9234,
"step": 1690
},
{
"epoch": 4.81,
"learning_rate": 2.8894736842105263e-05,
"loss": 1.7825,
"step": 1700
},
{
"epoch": 4.84,
"learning_rate": 2.8878947368421054e-05,
"loss": 1.7962,
"step": 1710
},
{
"epoch": 4.87,
"learning_rate": 2.886315789473684e-05,
"loss": 1.7204,
"step": 1720
},
{
"epoch": 4.9,
"learning_rate": 2.8847368421052634e-05,
"loss": 1.7531,
"step": 1730
},
{
"epoch": 4.93,
"learning_rate": 2.883157894736842e-05,
"loss": 1.772,
"step": 1740
},
{
"epoch": 4.96,
"learning_rate": 2.8815789473684212e-05,
"loss": 1.8145,
"step": 1750
},
{
"epoch": 4.98,
"learning_rate": 2.88e-05,
"loss": 1.8345,
"step": 1760
},
{
"epoch": 5.01,
"learning_rate": 2.8784210526315792e-05,
"loss": 1.8886,
"step": 1770
},
{
"epoch": 5.04,
"learning_rate": 2.876842105263158e-05,
"loss": 1.7124,
"step": 1780
},
{
"epoch": 5.07,
"learning_rate": 2.875263157894737e-05,
"loss": 1.7388,
"step": 1790
},
{
"epoch": 5.1,
"learning_rate": 2.8736842105263157e-05,
"loss": 1.7866,
"step": 1800
},
{
"epoch": 5.13,
"learning_rate": 2.8721052631578947e-05,
"loss": 1.7238,
"step": 1810
},
{
"epoch": 5.16,
"learning_rate": 2.8705263157894737e-05,
"loss": 1.687,
"step": 1820
},
{
"epoch": 5.18,
"learning_rate": 2.8689473684210528e-05,
"loss": 1.7261,
"step": 1830
},
{
"epoch": 5.21,
"learning_rate": 2.8673684210526318e-05,
"loss": 1.704,
"step": 1840
},
{
"epoch": 5.24,
"learning_rate": 2.8657894736842105e-05,
"loss": 1.7616,
"step": 1850
},
{
"epoch": 5.27,
"learning_rate": 2.8642105263157895e-05,
"loss": 1.7323,
"step": 1860
},
{
"epoch": 5.3,
"learning_rate": 2.8626315789473686e-05,
"loss": 1.8417,
"step": 1870
},
{
"epoch": 5.32,
"learning_rate": 2.8610526315789476e-05,
"loss": 1.7533,
"step": 1880
},
{
"epoch": 5.35,
"learning_rate": 2.8594736842105263e-05,
"loss": 1.7302,
"step": 1890
},
{
"epoch": 5.38,
"learning_rate": 2.8578947368421053e-05,
"loss": 1.7701,
"step": 1900
},
{
"epoch": 5.41,
"learning_rate": 2.8563157894736844e-05,
"loss": 1.8397,
"step": 1910
},
{
"epoch": 5.44,
"learning_rate": 2.8547368421052634e-05,
"loss": 1.7497,
"step": 1920
},
{
"epoch": 5.47,
"learning_rate": 2.853157894736842e-05,
"loss": 1.7129,
"step": 1930
},
{
"epoch": 5.49,
"learning_rate": 2.851578947368421e-05,
"loss": 1.7424,
"step": 1940
},
{
"epoch": 5.52,
"learning_rate": 2.8499999999999998e-05,
"loss": 1.7278,
"step": 1950
},
{
"epoch": 5.55,
"learning_rate": 2.8484210526315792e-05,
"loss": 1.7791,
"step": 1960
},
{
"epoch": 5.58,
"learning_rate": 2.846842105263158e-05,
"loss": 1.7271,
"step": 1970
},
{
"epoch": 5.61,
"learning_rate": 2.845263157894737e-05,
"loss": 1.7126,
"step": 1980
},
{
"epoch": 5.64,
"learning_rate": 2.8436842105263156e-05,
"loss": 1.6985,
"step": 1990
},
{
"epoch": 5.66,
"learning_rate": 2.8421052631578946e-05,
"loss": 1.7464,
"step": 2000
},
{
"epoch": 5.66,
"eval_denotation_accuracy": 0.5524549629106322,
"eval_loss": 1.9903446435928345,
"eval_runtime": 409.056,
"eval_samples_per_second": 6.921,
"eval_steps_per_second": 1.731,
"step": 2000
},
{
"epoch": 5.69,
"learning_rate": 2.840526315789474e-05,
"loss": 1.8094,
"step": 2010
},
{
"epoch": 5.72,
"learning_rate": 2.8389473684210527e-05,
"loss": 1.6919,
"step": 2020
},
{
"epoch": 5.75,
"learning_rate": 2.8373684210526317e-05,
"loss": 1.7333,
"step": 2030
},
{
"epoch": 5.78,
"learning_rate": 2.8357894736842104e-05,
"loss": 1.7074,
"step": 2040
},
{
"epoch": 5.81,
"learning_rate": 2.8342105263157898e-05,
"loss": 1.6483,
"step": 2050
},
{
"epoch": 5.83,
"learning_rate": 2.8326315789473685e-05,
"loss": 1.69,
"step": 2060
},
{
"epoch": 5.86,
"learning_rate": 2.8310526315789475e-05,
"loss": 1.7012,
"step": 2070
},
{
"epoch": 5.89,
"learning_rate": 2.8294736842105262e-05,
"loss": 1.7322,
"step": 2080
},
{
"epoch": 5.92,
"learning_rate": 2.8278947368421053e-05,
"loss": 1.7568,
"step": 2090
},
{
"epoch": 5.95,
"learning_rate": 2.8263157894736843e-05,
"loss": 1.7373,
"step": 2100
},
{
"epoch": 5.97,
"learning_rate": 2.8247368421052633e-05,
"loss": 1.7581,
"step": 2110
},
{
"epoch": 6.01,
"learning_rate": 2.823157894736842e-05,
"loss": 1.869,
"step": 2120
},
{
"epoch": 6.03,
"learning_rate": 2.821578947368421e-05,
"loss": 1.7298,
"step": 2130
},
{
"epoch": 6.06,
"learning_rate": 2.8199999999999998e-05,
"loss": 1.6599,
"step": 2140
},
{
"epoch": 6.09,
"learning_rate": 2.818421052631579e-05,
"loss": 1.7136,
"step": 2150
},
{
"epoch": 6.12,
"learning_rate": 2.816842105263158e-05,
"loss": 1.6803,
"step": 2160
},
{
"epoch": 6.15,
"learning_rate": 2.815263157894737e-05,
"loss": 1.6567,
"step": 2170
},
{
"epoch": 6.18,
"learning_rate": 2.813684210526316e-05,
"loss": 1.6761,
"step": 2180
},
{
"epoch": 6.2,
"learning_rate": 2.812105263157895e-05,
"loss": 1.6549,
"step": 2190
},
{
"epoch": 6.23,
"learning_rate": 2.810526315789474e-05,
"loss": 1.6628,
"step": 2200
},
{
"epoch": 6.26,
"learning_rate": 2.8089473684210527e-05,
"loss": 1.7003,
"step": 2210
},
{
"epoch": 6.29,
"learning_rate": 2.8073684210526317e-05,
"loss": 1.7405,
"step": 2220
},
{
"epoch": 6.32,
"learning_rate": 2.8057894736842104e-05,
"loss": 1.741,
"step": 2230
},
{
"epoch": 6.34,
"learning_rate": 2.8042105263157898e-05,
"loss": 1.6976,
"step": 2240
},
{
"epoch": 6.37,
"learning_rate": 2.8026315789473685e-05,
"loss": 1.6678,
"step": 2250
},
{
"epoch": 6.4,
"learning_rate": 2.8010526315789475e-05,
"loss": 1.688,
"step": 2260
},
{
"epoch": 6.43,
"learning_rate": 2.7994736842105262e-05,
"loss": 1.6566,
"step": 2270
},
{
"epoch": 6.46,
"learning_rate": 2.7978947368421052e-05,
"loss": 1.6851,
"step": 2280
},
{
"epoch": 6.49,
"learning_rate": 2.7963157894736843e-05,
"loss": 1.6712,
"step": 2290
},
{
"epoch": 6.51,
"learning_rate": 2.7947368421052633e-05,
"loss": 1.6816,
"step": 2300
},
{
"epoch": 6.54,
"learning_rate": 2.793157894736842e-05,
"loss": 1.6864,
"step": 2310
},
{
"epoch": 6.57,
"learning_rate": 2.791578947368421e-05,
"loss": 1.7062,
"step": 2320
},
{
"epoch": 6.6,
"learning_rate": 2.79e-05,
"loss": 1.6764,
"step": 2330
},
{
"epoch": 6.63,
"learning_rate": 2.788421052631579e-05,
"loss": 1.6874,
"step": 2340
},
{
"epoch": 6.66,
"learning_rate": 2.786842105263158e-05,
"loss": 1.6939,
"step": 2350
},
{
"epoch": 6.68,
"learning_rate": 2.7852631578947368e-05,
"loss": 1.6551,
"step": 2360
},
{
"epoch": 6.71,
"learning_rate": 2.783684210526316e-05,
"loss": 1.6876,
"step": 2370
},
{
"epoch": 6.74,
"learning_rate": 2.782105263157895e-05,
"loss": 1.6794,
"step": 2380
},
{
"epoch": 6.77,
"learning_rate": 2.780526315789474e-05,
"loss": 1.6693,
"step": 2390
},
{
"epoch": 6.8,
"learning_rate": 2.7789473684210526e-05,
"loss": 1.7284,
"step": 2400
},
{
"epoch": 6.83,
"learning_rate": 2.7773684210526316e-05,
"loss": 1.7203,
"step": 2410
},
{
"epoch": 6.85,
"learning_rate": 2.7757894736842103e-05,
"loss": 1.7067,
"step": 2420
},
{
"epoch": 6.88,
"learning_rate": 2.7742105263157897e-05,
"loss": 1.6756,
"step": 2430
},
{
"epoch": 6.91,
"learning_rate": 2.7726315789473684e-05,
"loss": 1.7367,
"step": 2440
},
{
"epoch": 6.94,
"learning_rate": 2.7710526315789474e-05,
"loss": 1.6639,
"step": 2450
},
{
"epoch": 6.97,
"learning_rate": 2.769473684210526e-05,
"loss": 1.7266,
"step": 2460
},
{
"epoch": 6.99,
"learning_rate": 2.7678947368421055e-05,
"loss": 1.696,
"step": 2470
},
{
"epoch": 7.03,
"learning_rate": 2.7663157894736842e-05,
"loss": 1.7717,
"step": 2480
},
{
"epoch": 7.05,
"learning_rate": 2.7647368421052632e-05,
"loss": 1.6731,
"step": 2490
},
{
"epoch": 7.08,
"learning_rate": 2.7631578947368423e-05,
"loss": 1.6213,
"step": 2500
},
{
"epoch": 7.11,
"learning_rate": 2.761578947368421e-05,
"loss": 1.6132,
"step": 2510
},
{
"epoch": 7.14,
"learning_rate": 2.7600000000000003e-05,
"loss": 1.6349,
"step": 2520
},
{
"epoch": 7.17,
"learning_rate": 2.758421052631579e-05,
"loss": 1.6066,
"step": 2530
},
{
"epoch": 7.19,
"learning_rate": 2.756842105263158e-05,
"loss": 1.5981,
"step": 2540
},
{
"epoch": 7.22,
"learning_rate": 2.7552631578947368e-05,
"loss": 1.6642,
"step": 2550
},
{
"epoch": 7.25,
"learning_rate": 2.7536842105263158e-05,
"loss": 1.6323,
"step": 2560
},
{
"epoch": 7.28,
"learning_rate": 2.752105263157895e-05,
"loss": 1.689,
"step": 2570
},
{
"epoch": 7.31,
"learning_rate": 2.750526315789474e-05,
"loss": 1.6511,
"step": 2580
},
{
"epoch": 7.34,
"learning_rate": 2.7489473684210526e-05,
"loss": 1.6616,
"step": 2590
},
{
"epoch": 7.36,
"learning_rate": 2.7473684210526316e-05,
"loss": 1.6584,
"step": 2600
},
{
"epoch": 7.39,
"learning_rate": 2.7457894736842106e-05,
"loss": 1.6503,
"step": 2610
},
{
"epoch": 7.42,
"learning_rate": 2.7442105263157897e-05,
"loss": 1.6598,
"step": 2620
},
{
"epoch": 7.45,
"learning_rate": 2.7426315789473684e-05,
"loss": 1.6312,
"step": 2630
},
{
"epoch": 7.48,
"learning_rate": 2.7410526315789474e-05,
"loss": 1.6501,
"step": 2640
},
{
"epoch": 7.51,
"learning_rate": 2.739473684210526e-05,
"loss": 1.708,
"step": 2650
},
{
"epoch": 7.53,
"learning_rate": 2.7378947368421055e-05,
"loss": 1.6449,
"step": 2660
},
{
"epoch": 7.56,
"learning_rate": 2.7363157894736845e-05,
"loss": 1.7256,
"step": 2670
},
{
"epoch": 7.59,
"learning_rate": 2.7347368421052632e-05,
"loss": 1.6688,
"step": 2680
},
{
"epoch": 7.62,
"learning_rate": 2.7331578947368422e-05,
"loss": 1.6438,
"step": 2690
},
{
"epoch": 7.65,
"learning_rate": 2.7315789473684213e-05,
"loss": 1.6481,
"step": 2700
},
{
"epoch": 7.68,
"learning_rate": 2.7300000000000003e-05,
"loss": 1.6991,
"step": 2710
},
{
"epoch": 7.7,
"learning_rate": 2.728421052631579e-05,
"loss": 1.6602,
"step": 2720
},
{
"epoch": 7.73,
"learning_rate": 2.726842105263158e-05,
"loss": 1.6537,
"step": 2730
},
{
"epoch": 7.76,
"learning_rate": 2.7252631578947367e-05,
"loss": 1.6564,
"step": 2740
},
{
"epoch": 7.79,
"learning_rate": 2.723684210526316e-05,
"loss": 1.6607,
"step": 2750
},
{
"epoch": 7.82,
"learning_rate": 2.7221052631578948e-05,
"loss": 1.6228,
"step": 2760
},
{
"epoch": 7.84,
"learning_rate": 2.7205263157894738e-05,
"loss": 1.6732,
"step": 2770
},
{
"epoch": 7.87,
"learning_rate": 2.7189473684210525e-05,
"loss": 1.7004,
"step": 2780
},
{
"epoch": 7.9,
"learning_rate": 2.7173684210526315e-05,
"loss": 1.678,
"step": 2790
},
{
"epoch": 7.93,
"learning_rate": 2.7157894736842106e-05,
"loss": 1.6379,
"step": 2800
},
{
"epoch": 7.96,
"learning_rate": 2.7142105263157896e-05,
"loss": 1.6475,
"step": 2810
},
{
"epoch": 7.99,
"learning_rate": 2.7126315789473683e-05,
"loss": 1.6697,
"step": 2820
},
{
"epoch": 8.02,
"learning_rate": 2.7110526315789473e-05,
"loss": 1.7948,
"step": 2830
},
{
"epoch": 8.05,
"learning_rate": 2.7094736842105267e-05,
"loss": 1.6428,
"step": 2840
},
{
"epoch": 8.07,
"learning_rate": 2.7078947368421054e-05,
"loss": 1.6928,
"step": 2850
},
{
"epoch": 8.1,
"learning_rate": 2.7063157894736844e-05,
"loss": 1.6174,
"step": 2860
},
{
"epoch": 8.13,
"learning_rate": 2.704736842105263e-05,
"loss": 1.6166,
"step": 2870
},
{
"epoch": 8.16,
"learning_rate": 2.7031578947368422e-05,
"loss": 1.6341,
"step": 2880
},
{
"epoch": 8.19,
"learning_rate": 2.7015789473684212e-05,
"loss": 1.6341,
"step": 2890
},
{
"epoch": 8.21,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.6289,
"step": 2900
},
{
"epoch": 8.24,
"learning_rate": 2.698421052631579e-05,
"loss": 1.6076,
"step": 2910
},
{
"epoch": 8.27,
"learning_rate": 2.696842105263158e-05,
"loss": 1.6466,
"step": 2920
},
{
"epoch": 8.3,
"learning_rate": 2.6952631578947367e-05,
"loss": 1.6026,
"step": 2930
},
{
"epoch": 8.33,
"learning_rate": 2.693684210526316e-05,
"loss": 1.5972,
"step": 2940
},
{
"epoch": 8.36,
"learning_rate": 2.6921052631578947e-05,
"loss": 1.6079,
"step": 2950
},
{
"epoch": 8.38,
"learning_rate": 2.6905263157894738e-05,
"loss": 1.6655,
"step": 2960
},
{
"epoch": 8.41,
"learning_rate": 2.6889473684210525e-05,
"loss": 1.6134,
"step": 2970
},
{
"epoch": 8.44,
"learning_rate": 2.687368421052632e-05,
"loss": 1.6399,
"step": 2980
},
{
"epoch": 8.47,
"learning_rate": 2.6857894736842105e-05,
"loss": 1.6358,
"step": 2990
},
{
"epoch": 8.5,
"learning_rate": 2.6842105263157896e-05,
"loss": 1.6447,
"step": 3000
},
{
"epoch": 8.5,
"eval_denotation_accuracy": 0.5637583892617449,
"eval_loss": 2.023669958114624,
"eval_runtime": 415.5806,
"eval_samples_per_second": 6.812,
"eval_steps_per_second": 1.704,
"step": 3000
},
{
"epoch": 8.53,
"learning_rate": 2.6826315789473686e-05,
"loss": 1.6034,
"step": 3010
},
{
"epoch": 8.55,
"learning_rate": 2.6810526315789473e-05,
"loss": 1.5992,
"step": 3020
},
{
"epoch": 8.58,
"learning_rate": 2.6794736842105267e-05,
"loss": 1.6025,
"step": 3030
},
{
"epoch": 8.61,
"learning_rate": 2.6778947368421054e-05,
"loss": 1.6258,
"step": 3040
},
{
"epoch": 8.64,
"learning_rate": 2.6763157894736844e-05,
"loss": 1.6088,
"step": 3050
},
{
"epoch": 8.67,
"learning_rate": 2.674736842105263e-05,
"loss": 1.6179,
"step": 3060
},
{
"epoch": 8.7,
"learning_rate": 2.673157894736842e-05,
"loss": 1.6292,
"step": 3070
},
{
"epoch": 8.72,
"learning_rate": 2.671578947368421e-05,
"loss": 1.6844,
"step": 3080
},
{
"epoch": 8.75,
"learning_rate": 2.6700000000000002e-05,
"loss": 1.6472,
"step": 3090
},
{
"epoch": 8.78,
"learning_rate": 2.668421052631579e-05,
"loss": 1.6238,
"step": 3100
},
{
"epoch": 8.81,
"learning_rate": 2.666842105263158e-05,
"loss": 1.6499,
"step": 3110
},
{
"epoch": 8.84,
"learning_rate": 2.665263157894737e-05,
"loss": 1.6397,
"step": 3120
},
{
"epoch": 8.86,
"learning_rate": 2.663684210526316e-05,
"loss": 1.6594,
"step": 3130
},
{
"epoch": 8.89,
"learning_rate": 2.6621052631578947e-05,
"loss": 1.6612,
"step": 3140
},
{
"epoch": 8.92,
"learning_rate": 2.6605263157894737e-05,
"loss": 1.6247,
"step": 3150
},
{
"epoch": 8.95,
"learning_rate": 2.6589473684210524e-05,
"loss": 1.633,
"step": 3160
},
{
"epoch": 8.98,
"learning_rate": 2.6573684210526318e-05,
"loss": 1.6364,
"step": 3170
},
{
"epoch": 9.01,
"learning_rate": 2.6557894736842108e-05,
"loss": 1.7977,
"step": 3180
},
{
"epoch": 9.04,
"learning_rate": 2.6542105263157895e-05,
"loss": 1.5909,
"step": 3190
},
{
"epoch": 9.06,
"learning_rate": 2.6526315789473685e-05,
"loss": 1.5751,
"step": 3200
},
{
"epoch": 9.09,
"learning_rate": 2.6510526315789472e-05,
"loss": 1.5807,
"step": 3210
},
{
"epoch": 9.12,
"learning_rate": 2.6494736842105266e-05,
"loss": 1.5911,
"step": 3220
},
{
"epoch": 9.15,
"learning_rate": 2.6478947368421053e-05,
"loss": 1.6176,
"step": 3230
},
{
"epoch": 9.18,
"learning_rate": 2.6463157894736843e-05,
"loss": 1.5872,
"step": 3240
},
{
"epoch": 9.21,
"learning_rate": 2.644736842105263e-05,
"loss": 1.5697,
"step": 3250
},
{
"epoch": 9.23,
"learning_rate": 2.6431578947368424e-05,
"loss": 1.5871,
"step": 3260
},
{
"epoch": 9.26,
"learning_rate": 2.641578947368421e-05,
"loss": 1.6368,
"step": 3270
},
{
"epoch": 9.29,
"learning_rate": 2.64e-05,
"loss": 1.593,
"step": 3280
},
{
"epoch": 9.32,
"learning_rate": 2.6384210526315788e-05,
"loss": 1.577,
"step": 3290
},
{
"epoch": 9.35,
"learning_rate": 2.636842105263158e-05,
"loss": 1.6126,
"step": 3300
},
{
"epoch": 9.38,
"learning_rate": 2.635263157894737e-05,
"loss": 1.6005,
"step": 3310
},
{
"epoch": 9.4,
"learning_rate": 2.633684210526316e-05,
"loss": 1.6161,
"step": 3320
},
{
"epoch": 9.43,
"learning_rate": 2.6321052631578946e-05,
"loss": 1.6188,
"step": 3330
},
{
"epoch": 9.46,
"learning_rate": 2.6305263157894737e-05,
"loss": 1.6316,
"step": 3340
},
{
"epoch": 9.49,
"learning_rate": 2.6289473684210527e-05,
"loss": 1.6132,
"step": 3350
},
{
"epoch": 9.52,
"learning_rate": 2.6273684210526317e-05,
"loss": 1.6491,
"step": 3360
},
{
"epoch": 9.55,
"learning_rate": 2.6257894736842108e-05,
"loss": 1.6004,
"step": 3370
},
{
"epoch": 9.57,
"learning_rate": 2.6242105263157895e-05,
"loss": 1.6161,
"step": 3380
},
{
"epoch": 9.6,
"learning_rate": 2.6226315789473685e-05,
"loss": 1.607,
"step": 3390
},
{
"epoch": 9.63,
"learning_rate": 2.6210526315789475e-05,
"loss": 1.6207,
"step": 3400
},
{
"epoch": 9.66,
"learning_rate": 2.6194736842105266e-05,
"loss": 1.6111,
"step": 3410
},
{
"epoch": 9.69,
"learning_rate": 2.6178947368421053e-05,
"loss": 1.5797,
"step": 3420
},
{
"epoch": 9.71,
"learning_rate": 2.6163157894736843e-05,
"loss": 1.5955,
"step": 3430
},
{
"epoch": 9.74,
"learning_rate": 2.614736842105263e-05,
"loss": 1.6303,
"step": 3440
},
{
"epoch": 9.77,
"learning_rate": 2.6131578947368424e-05,
"loss": 1.6379,
"step": 3450
},
{
"epoch": 9.8,
"learning_rate": 2.611578947368421e-05,
"loss": 1.6016,
"step": 3460
},
{
"epoch": 9.83,
"learning_rate": 2.61e-05,
"loss": 1.6299,
"step": 3470
},
{
"epoch": 9.86,
"learning_rate": 2.6084210526315788e-05,
"loss": 1.5963,
"step": 3480
},
{
"epoch": 9.88,
"learning_rate": 2.6068421052631578e-05,
"loss": 1.5872,
"step": 3490
},
{
"epoch": 9.91,
"learning_rate": 2.605263157894737e-05,
"loss": 1.626,
"step": 3500
},
{
"epoch": 9.94,
"learning_rate": 2.603684210526316e-05,
"loss": 1.5876,
"step": 3510
},
{
"epoch": 9.97,
"learning_rate": 2.602105263157895e-05,
"loss": 1.6061,
"step": 3520
},
{
"epoch": 10.0,
"learning_rate": 2.6005263157894736e-05,
"loss": 1.5925,
"step": 3530
},
{
"epoch": 10.03,
"learning_rate": 2.598947368421053e-05,
"loss": 1.7235,
"step": 3540
},
{
"epoch": 10.06,
"learning_rate": 2.5973684210526317e-05,
"loss": 1.5714,
"step": 3550
},
{
"epoch": 10.08,
"learning_rate": 2.5957894736842107e-05,
"loss": 1.5669,
"step": 3560
},
{
"epoch": 10.11,
"learning_rate": 2.5942105263157894e-05,
"loss": 1.573,
"step": 3570
},
{
"epoch": 10.14,
"learning_rate": 2.5926315789473684e-05,
"loss": 1.5635,
"step": 3580
},
{
"epoch": 10.17,
"learning_rate": 2.5910526315789475e-05,
"loss": 1.5614,
"step": 3590
},
{
"epoch": 10.2,
"learning_rate": 2.5894736842105265e-05,
"loss": 1.5664,
"step": 3600
},
{
"epoch": 10.23,
"learning_rate": 2.5878947368421052e-05,
"loss": 1.5666,
"step": 3610
},
{
"epoch": 10.25,
"learning_rate": 2.5863157894736842e-05,
"loss": 1.5893,
"step": 3620
},
{
"epoch": 10.28,
"learning_rate": 2.584736842105263e-05,
"loss": 1.5843,
"step": 3630
},
{
"epoch": 10.31,
"learning_rate": 2.5831578947368423e-05,
"loss": 1.5478,
"step": 3640
},
{
"epoch": 10.34,
"learning_rate": 2.581578947368421e-05,
"loss": 1.5738,
"step": 3650
},
{
"epoch": 10.37,
"learning_rate": 2.58e-05,
"loss": 1.5638,
"step": 3660
},
{
"epoch": 10.4,
"learning_rate": 2.578421052631579e-05,
"loss": 1.5695,
"step": 3670
},
{
"epoch": 10.42,
"learning_rate": 2.576842105263158e-05,
"loss": 1.5776,
"step": 3680
},
{
"epoch": 10.45,
"learning_rate": 2.575263157894737e-05,
"loss": 1.5758,
"step": 3690
},
{
"epoch": 10.48,
"learning_rate": 2.5736842105263158e-05,
"loss": 1.5606,
"step": 3700
},
{
"epoch": 10.51,
"learning_rate": 2.572105263157895e-05,
"loss": 1.6042,
"step": 3710
},
{
"epoch": 10.54,
"learning_rate": 2.5705263157894736e-05,
"loss": 1.563,
"step": 3720
},
{
"epoch": 10.57,
"learning_rate": 2.568947368421053e-05,
"loss": 1.5776,
"step": 3730
},
{
"epoch": 10.59,
"learning_rate": 2.5673684210526316e-05,
"loss": 1.6218,
"step": 3740
},
{
"epoch": 10.62,
"learning_rate": 2.5657894736842107e-05,
"loss": 1.5918,
"step": 3750
},
{
"epoch": 10.65,
"learning_rate": 2.5642105263157894e-05,
"loss": 1.5627,
"step": 3760
},
{
"epoch": 10.68,
"learning_rate": 2.5626315789473684e-05,
"loss": 1.5722,
"step": 3770
},
{
"epoch": 10.71,
"learning_rate": 2.5610526315789474e-05,
"loss": 1.5993,
"step": 3780
},
{
"epoch": 10.73,
"learning_rate": 2.5594736842105265e-05,
"loss": 1.5965,
"step": 3790
},
{
"epoch": 10.76,
"learning_rate": 2.557894736842105e-05,
"loss": 1.5654,
"step": 3800
},
{
"epoch": 10.79,
"learning_rate": 2.5563157894736842e-05,
"loss": 1.5903,
"step": 3810
},
{
"epoch": 10.82,
"learning_rate": 2.5547368421052632e-05,
"loss": 1.5632,
"step": 3820
},
{
"epoch": 10.85,
"learning_rate": 2.5531578947368423e-05,
"loss": 1.5944,
"step": 3830
},
{
"epoch": 10.88,
"learning_rate": 2.5515789473684213e-05,
"loss": 1.5708,
"step": 3840
},
{
"epoch": 10.9,
"learning_rate": 2.55e-05,
"loss": 1.5838,
"step": 3850
},
{
"epoch": 10.93,
"learning_rate": 2.548421052631579e-05,
"loss": 1.6397,
"step": 3860
},
{
"epoch": 10.96,
"learning_rate": 2.546842105263158e-05,
"loss": 1.6055,
"step": 3870
},
{
"epoch": 10.99,
"learning_rate": 2.545263157894737e-05,
"loss": 1.6096,
"step": 3880
},
{
"epoch": 11.02,
"learning_rate": 2.5436842105263158e-05,
"loss": 1.73,
"step": 3890
},
{
"epoch": 11.05,
"learning_rate": 2.5421052631578948e-05,
"loss": 1.558,
"step": 3900
},
{
"epoch": 11.08,
"learning_rate": 2.5405263157894735e-05,
"loss": 1.6073,
"step": 3910
},
{
"epoch": 11.1,
"learning_rate": 2.538947368421053e-05,
"loss": 1.5865,
"step": 3920
},
{
"epoch": 11.13,
"learning_rate": 2.5373684210526316e-05,
"loss": 1.5615,
"step": 3930
},
{
"epoch": 11.16,
"learning_rate": 2.5357894736842106e-05,
"loss": 1.5534,
"step": 3940
},
{
"epoch": 11.19,
"learning_rate": 2.5342105263157893e-05,
"loss": 1.5786,
"step": 3950
},
{
"epoch": 11.22,
"learning_rate": 2.5326315789473687e-05,
"loss": 1.5597,
"step": 3960
},
{
"epoch": 11.25,
"learning_rate": 2.5310526315789474e-05,
"loss": 1.5256,
"step": 3970
},
{
"epoch": 11.27,
"learning_rate": 2.5294736842105264e-05,
"loss": 1.5662,
"step": 3980
},
{
"epoch": 11.3,
"learning_rate": 2.527894736842105e-05,
"loss": 1.5891,
"step": 3990
},
{
"epoch": 11.33,
"learning_rate": 2.526315789473684e-05,
"loss": 1.5694,
"step": 4000
},
{
"epoch": 11.33,
"eval_denotation_accuracy": 0.555987283645355,
"eval_loss": 2.0672969818115234,
"eval_runtime": 545.5849,
"eval_samples_per_second": 5.189,
"eval_steps_per_second": 1.298,
"step": 4000
},
{
"epoch": 11.36,
"learning_rate": 2.5247368421052635e-05,
"loss": 1.5932,
"step": 4010
},
{
"epoch": 11.39,
"learning_rate": 2.5231578947368422e-05,
"loss": 1.5728,
"step": 4020
},
{
"epoch": 11.42,
"learning_rate": 2.5215789473684212e-05,
"loss": 1.5914,
"step": 4030
},
{
"epoch": 11.44,
"learning_rate": 2.52e-05,
"loss": 1.579,
"step": 4040
},
{
"epoch": 11.47,
"learning_rate": 2.518421052631579e-05,
"loss": 1.5675,
"step": 4050
},
{
"epoch": 11.5,
"learning_rate": 2.516842105263158e-05,
"loss": 1.5588,
"step": 4060
},
{
"epoch": 11.53,
"learning_rate": 2.515263157894737e-05,
"loss": 1.5447,
"step": 4070
},
{
"epoch": 11.56,
"learning_rate": 2.5136842105263157e-05,
"loss": 1.5659,
"step": 4080
},
{
"epoch": 11.58,
"learning_rate": 2.5121052631578948e-05,
"loss": 1.5522,
"step": 4090
},
{
"epoch": 11.61,
"learning_rate": 2.5105263157894738e-05,
"loss": 1.5465,
"step": 4100
},
{
"epoch": 11.64,
"learning_rate": 2.5089473684210528e-05,
"loss": 1.5612,
"step": 4110
},
{
"epoch": 11.67,
"learning_rate": 2.5073684210526315e-05,
"loss": 1.5501,
"step": 4120
},
{
"epoch": 11.7,
"learning_rate": 2.5057894736842106e-05,
"loss": 1.5516,
"step": 4130
},
{
"epoch": 11.73,
"learning_rate": 2.5042105263157893e-05,
"loss": 1.5716,
"step": 4140
},
{
"epoch": 11.75,
"learning_rate": 2.5026315789473686e-05,
"loss": 1.5688,
"step": 4150
},
{
"epoch": 11.78,
"learning_rate": 2.5010526315789473e-05,
"loss": 1.5453,
"step": 4160
},
{
"epoch": 11.81,
"learning_rate": 2.4994736842105264e-05,
"loss": 1.5601,
"step": 4170
},
{
"epoch": 11.84,
"learning_rate": 2.4978947368421054e-05,
"loss": 1.5772,
"step": 4180
},
{
"epoch": 11.87,
"learning_rate": 2.4963157894736844e-05,
"loss": 1.573,
"step": 4190
},
{
"epoch": 11.9,
"learning_rate": 2.4947368421052635e-05,
"loss": 1.5693,
"step": 4200
},
{
"epoch": 11.92,
"learning_rate": 2.493157894736842e-05,
"loss": 1.5634,
"step": 4210
},
{
"epoch": 11.95,
"learning_rate": 2.4915789473684212e-05,
"loss": 1.5836,
"step": 4220
},
{
"epoch": 11.98,
"learning_rate": 2.49e-05,
"loss": 1.5607,
"step": 4230
},
{
"epoch": 12.01,
"learning_rate": 2.4884210526315792e-05,
"loss": 1.6897,
"step": 4240
},
{
"epoch": 12.04,
"learning_rate": 2.486842105263158e-05,
"loss": 1.5572,
"step": 4250
},
{
"epoch": 12.07,
"learning_rate": 2.485263157894737e-05,
"loss": 1.5313,
"step": 4260
},
{
"epoch": 12.1,
"learning_rate": 2.4836842105263157e-05,
"loss": 1.5374,
"step": 4270
},
{
"epoch": 12.12,
"learning_rate": 2.4821052631578947e-05,
"loss": 1.5246,
"step": 4280
},
{
"epoch": 12.15,
"learning_rate": 2.4805263157894737e-05,
"loss": 1.5319,
"step": 4290
},
{
"epoch": 12.18,
"learning_rate": 2.4789473684210528e-05,
"loss": 1.5379,
"step": 4300
},
{
"epoch": 12.21,
"learning_rate": 2.4773684210526315e-05,
"loss": 1.531,
"step": 4310
},
{
"epoch": 12.24,
"learning_rate": 2.4757894736842105e-05,
"loss": 1.5313,
"step": 4320
},
{
"epoch": 12.27,
"learning_rate": 2.4742105263157895e-05,
"loss": 1.538,
"step": 4330
},
{
"epoch": 12.29,
"learning_rate": 2.4726315789473686e-05,
"loss": 1.5354,
"step": 4340
},
{
"epoch": 12.32,
"learning_rate": 2.4710526315789476e-05,
"loss": 1.5123,
"step": 4350
},
{
"epoch": 12.35,
"learning_rate": 2.4694736842105263e-05,
"loss": 1.5324,
"step": 4360
},
{
"epoch": 12.38,
"learning_rate": 2.4678947368421053e-05,
"loss": 1.5454,
"step": 4370
},
{
"epoch": 12.41,
"learning_rate": 2.4663157894736844e-05,
"loss": 1.5264,
"step": 4380
},
{
"epoch": 12.44,
"learning_rate": 2.4647368421052634e-05,
"loss": 1.5431,
"step": 4390
},
{
"epoch": 12.46,
"learning_rate": 2.463157894736842e-05,
"loss": 1.561,
"step": 4400
},
{
"epoch": 12.49,
"learning_rate": 2.461578947368421e-05,
"loss": 1.5431,
"step": 4410
},
{
"epoch": 12.52,
"learning_rate": 2.4599999999999998e-05,
"loss": 1.5622,
"step": 4420
},
{
"epoch": 12.55,
"learning_rate": 2.4584210526315792e-05,
"loss": 1.5437,
"step": 4430
},
{
"epoch": 12.58,
"learning_rate": 2.456842105263158e-05,
"loss": 1.5525,
"step": 4440
},
{
"epoch": 12.6,
"learning_rate": 2.455263157894737e-05,
"loss": 1.5432,
"step": 4450
},
{
"epoch": 12.63,
"learning_rate": 2.4536842105263156e-05,
"loss": 1.5883,
"step": 4460
},
{
"epoch": 12.66,
"learning_rate": 2.452105263157895e-05,
"loss": 1.5411,
"step": 4470
},
{
"epoch": 12.69,
"learning_rate": 2.4505263157894737e-05,
"loss": 1.5631,
"step": 4480
},
{
"epoch": 12.72,
"learning_rate": 2.4489473684210527e-05,
"loss": 1.5579,
"step": 4490
},
{
"epoch": 12.75,
"learning_rate": 2.4473684210526318e-05,
"loss": 1.5554,
"step": 4500
},
{
"epoch": 12.77,
"learning_rate": 2.4457894736842105e-05,
"loss": 1.5325,
"step": 4510
},
{
"epoch": 12.8,
"learning_rate": 2.4442105263157898e-05,
"loss": 1.5589,
"step": 4520
},
{
"epoch": 12.83,
"learning_rate": 2.4426315789473685e-05,
"loss": 1.5583,
"step": 4530
},
{
"epoch": 12.86,
"learning_rate": 2.4410526315789476e-05,
"loss": 1.5552,
"step": 4540
},
{
"epoch": 12.89,
"learning_rate": 2.4394736842105262e-05,
"loss": 1.559,
"step": 4550
},
{
"epoch": 12.92,
"learning_rate": 2.4378947368421053e-05,
"loss": 1.5267,
"step": 4560
},
{
"epoch": 12.94,
"learning_rate": 2.4363157894736843e-05,
"loss": 1.5576,
"step": 4570
},
{
"epoch": 12.97,
"learning_rate": 2.4347368421052633e-05,
"loss": 1.5553,
"step": 4580
},
{
"epoch": 13.0,
"learning_rate": 2.433157894736842e-05,
"loss": 1.6593,
"step": 4590
},
{
"epoch": 13.03,
"learning_rate": 2.431578947368421e-05,
"loss": 1.5177,
"step": 4600
},
{
"epoch": 13.06,
"learning_rate": 2.43e-05,
"loss": 1.5141,
"step": 4610
},
{
"epoch": 13.09,
"learning_rate": 2.428421052631579e-05,
"loss": 1.5269,
"step": 4620
},
{
"epoch": 13.12,
"learning_rate": 2.426842105263158e-05,
"loss": 1.5197,
"step": 4630
},
{
"epoch": 13.14,
"learning_rate": 2.425263157894737e-05,
"loss": 1.5164,
"step": 4640
},
{
"epoch": 13.17,
"learning_rate": 2.4236842105263156e-05,
"loss": 1.5322,
"step": 4650
},
{
"epoch": 13.2,
"learning_rate": 2.422105263157895e-05,
"loss": 1.5146,
"step": 4660
},
{
"epoch": 13.23,
"learning_rate": 2.420526315789474e-05,
"loss": 1.5173,
"step": 4670
},
{
"epoch": 13.26,
"learning_rate": 2.4189473684210527e-05,
"loss": 1.548,
"step": 4680
},
{
"epoch": 13.29,
"learning_rate": 2.4173684210526317e-05,
"loss": 1.521,
"step": 4690
},
{
"epoch": 13.31,
"learning_rate": 2.4157894736842104e-05,
"loss": 1.5316,
"step": 4700
},
{
"epoch": 13.34,
"learning_rate": 2.4142105263157898e-05,
"loss": 1.508,
"step": 4710
},
{
"epoch": 13.37,
"learning_rate": 2.4126315789473685e-05,
"loss": 1.5215,
"step": 4720
},
{
"epoch": 13.4,
"learning_rate": 2.4110526315789475e-05,
"loss": 1.515,
"step": 4730
},
{
"epoch": 13.43,
"learning_rate": 2.4094736842105262e-05,
"loss": 1.5275,
"step": 4740
},
{
"epoch": 13.45,
"learning_rate": 2.4078947368421056e-05,
"loss": 1.5286,
"step": 4750
},
{
"epoch": 13.48,
"learning_rate": 2.4063157894736843e-05,
"loss": 1.5315,
"step": 4760
},
{
"epoch": 13.51,
"learning_rate": 2.4047368421052633e-05,
"loss": 1.5211,
"step": 4770
},
{
"epoch": 13.54,
"learning_rate": 2.403157894736842e-05,
"loss": 1.5282,
"step": 4780
},
{
"epoch": 13.57,
"learning_rate": 2.401578947368421e-05,
"loss": 1.5341,
"step": 4790
},
{
"epoch": 13.6,
"learning_rate": 2.4e-05,
"loss": 1.5497,
"step": 4800
},
{
"epoch": 13.62,
"learning_rate": 2.398421052631579e-05,
"loss": 1.5217,
"step": 4810
},
{
"epoch": 13.65,
"learning_rate": 2.3968421052631578e-05,
"loss": 1.5514,
"step": 4820
},
{
"epoch": 13.68,
"learning_rate": 2.3952631578947368e-05,
"loss": 1.5398,
"step": 4830
},
{
"epoch": 13.71,
"learning_rate": 2.393684210526316e-05,
"loss": 1.5226,
"step": 4840
},
{
"epoch": 13.74,
"learning_rate": 2.392105263157895e-05,
"loss": 1.5465,
"step": 4850
},
{
"epoch": 13.77,
"learning_rate": 2.390526315789474e-05,
"loss": 1.5332,
"step": 4860
},
{
"epoch": 13.79,
"learning_rate": 2.3889473684210526e-05,
"loss": 1.5446,
"step": 4870
},
{
"epoch": 13.82,
"learning_rate": 2.3873684210526317e-05,
"loss": 1.5414,
"step": 4880
},
{
"epoch": 13.85,
"learning_rate": 2.3857894736842107e-05,
"loss": 1.5127,
"step": 4890
},
{
"epoch": 13.88,
"learning_rate": 2.3842105263157897e-05,
"loss": 1.5044,
"step": 4900
},
{
"epoch": 13.91,
"learning_rate": 2.3826315789473684e-05,
"loss": 1.5433,
"step": 4910
},
{
"epoch": 13.94,
"learning_rate": 2.3810526315789475e-05,
"loss": 1.5447,
"step": 4920
},
{
"epoch": 13.96,
"learning_rate": 2.379473684210526e-05,
"loss": 1.585,
"step": 4930
},
{
"epoch": 13.99,
"learning_rate": 2.3778947368421055e-05,
"loss": 1.5463,
"step": 4940
},
{
"epoch": 14.02,
"learning_rate": 2.3763157894736842e-05,
"loss": 1.6538,
"step": 4950
},
{
"epoch": 14.05,
"learning_rate": 2.3747368421052632e-05,
"loss": 1.5099,
"step": 4960
},
{
"epoch": 14.08,
"learning_rate": 2.373157894736842e-05,
"loss": 1.5006,
"step": 4970
},
{
"epoch": 14.11,
"learning_rate": 2.371578947368421e-05,
"loss": 1.5441,
"step": 4980
},
{
"epoch": 14.14,
"learning_rate": 2.37e-05,
"loss": 1.5238,
"step": 4990
},
{
"epoch": 14.16,
"learning_rate": 2.368421052631579e-05,
"loss": 1.5332,
"step": 5000
},
{
"epoch": 14.16,
"eval_denotation_accuracy": 0.5588131402331332,
"eval_loss": 2.120304584503174,
"eval_runtime": 363.847,
"eval_samples_per_second": 7.781,
"eval_steps_per_second": 1.946,
"step": 5000
},
{
"epoch": 14.19,
"learning_rate": 2.366842105263158e-05,
"loss": 1.5345,
"step": 5010
},
{
"epoch": 14.22,
"learning_rate": 2.3652631578947368e-05,
"loss": 1.5463,
"step": 5020
},
{
"epoch": 14.25,
"learning_rate": 2.363684210526316e-05,
"loss": 1.5225,
"step": 5030
},
{
"epoch": 14.28,
"learning_rate": 2.362105263157895e-05,
"loss": 1.5379,
"step": 5040
},
{
"epoch": 14.31,
"learning_rate": 2.360526315789474e-05,
"loss": 1.5255,
"step": 5050
},
{
"epoch": 14.33,
"learning_rate": 2.3589473684210526e-05,
"loss": 1.524,
"step": 5060
},
{
"epoch": 14.36,
"learning_rate": 2.3573684210526316e-05,
"loss": 1.5206,
"step": 5070
},
{
"epoch": 14.39,
"learning_rate": 2.3557894736842106e-05,
"loss": 1.5192,
"step": 5080
},
{
"epoch": 14.42,
"learning_rate": 2.3542105263157897e-05,
"loss": 1.5225,
"step": 5090
},
{
"epoch": 14.45,
"learning_rate": 2.3526315789473684e-05,
"loss": 1.5119,
"step": 5100
},
{
"epoch": 14.47,
"learning_rate": 2.3510526315789474e-05,
"loss": 1.5378,
"step": 5110
},
{
"epoch": 14.5,
"learning_rate": 2.349473684210526e-05,
"loss": 1.5179,
"step": 5120
},
{
"epoch": 14.53,
"learning_rate": 2.3478947368421055e-05,
"loss": 1.5317,
"step": 5130
},
{
"epoch": 14.56,
"learning_rate": 2.346315789473684e-05,
"loss": 1.514,
"step": 5140
},
{
"epoch": 14.59,
"learning_rate": 2.3447368421052632e-05,
"loss": 1.5168,
"step": 5150
},
{
"epoch": 14.62,
"learning_rate": 2.343157894736842e-05,
"loss": 1.4988,
"step": 5160
},
{
"epoch": 14.64,
"learning_rate": 2.3415789473684213e-05,
"loss": 1.5204,
"step": 5170
},
{
"epoch": 14.67,
"learning_rate": 2.3400000000000003e-05,
"loss": 1.5065,
"step": 5180
},
{
"epoch": 14.7,
"learning_rate": 2.338421052631579e-05,
"loss": 1.509,
"step": 5190
},
{
"epoch": 14.73,
"learning_rate": 2.336842105263158e-05,
"loss": 1.5171,
"step": 5200
},
{
"epoch": 14.76,
"learning_rate": 2.3352631578947367e-05,
"loss": 1.5005,
"step": 5210
},
{
"epoch": 14.79,
"learning_rate": 2.333684210526316e-05,
"loss": 1.5134,
"step": 5220
},
{
"epoch": 14.81,
"learning_rate": 2.3321052631578948e-05,
"loss": 1.4947,
"step": 5230
},
{
"epoch": 14.84,
"learning_rate": 2.3305263157894738e-05,
"loss": 1.5001,
"step": 5240
},
{
"epoch": 14.87,
"learning_rate": 2.3289473684210525e-05,
"loss": 1.5067,
"step": 5250
},
{
"epoch": 14.9,
"learning_rate": 2.3273684210526316e-05,
"loss": 1.4931,
"step": 5260
},
{
"epoch": 14.93,
"learning_rate": 2.3257894736842106e-05,
"loss": 1.4966,
"step": 5270
},
{
"epoch": 14.96,
"learning_rate": 2.3242105263157896e-05,
"loss": 1.5149,
"step": 5280
},
{
"epoch": 14.98,
"learning_rate": 2.3226315789473683e-05,
"loss": 1.5046,
"step": 5290
},
{
"epoch": 15.01,
"learning_rate": 2.3210526315789473e-05,
"loss": 1.6353,
"step": 5300
},
{
"epoch": 15.04,
"learning_rate": 2.3194736842105264e-05,
"loss": 1.505,
"step": 5310
},
{
"epoch": 15.07,
"learning_rate": 2.3178947368421054e-05,
"loss": 1.5005,
"step": 5320
},
{
"epoch": 15.1,
"learning_rate": 2.316315789473684e-05,
"loss": 1.5189,
"step": 5330
},
{
"epoch": 15.13,
"learning_rate": 2.314736842105263e-05,
"loss": 1.4973,
"step": 5340
},
{
"epoch": 15.16,
"learning_rate": 2.3131578947368422e-05,
"loss": 1.5069,
"step": 5350
},
{
"epoch": 15.18,
"learning_rate": 2.3115789473684212e-05,
"loss": 1.5069,
"step": 5360
},
{
"epoch": 15.21,
"learning_rate": 2.3100000000000002e-05,
"loss": 1.5092,
"step": 5370
},
{
"epoch": 15.24,
"learning_rate": 2.308421052631579e-05,
"loss": 1.5012,
"step": 5380
},
{
"epoch": 15.27,
"learning_rate": 2.306842105263158e-05,
"loss": 1.5132,
"step": 5390
},
{
"epoch": 15.3,
"learning_rate": 2.3052631578947367e-05,
"loss": 1.5186,
"step": 5400
},
{
"epoch": 15.32,
"learning_rate": 2.303684210526316e-05,
"loss": 1.5145,
"step": 5410
},
{
"epoch": 15.35,
"learning_rate": 2.3021052631578947e-05,
"loss": 1.5089,
"step": 5420
},
{
"epoch": 15.38,
"learning_rate": 2.3005263157894738e-05,
"loss": 1.4856,
"step": 5430
},
{
"epoch": 15.41,
"learning_rate": 2.2989473684210525e-05,
"loss": 1.5063,
"step": 5440
},
{
"epoch": 15.44,
"learning_rate": 2.297368421052632e-05,
"loss": 1.5018,
"step": 5450
},
{
"epoch": 15.47,
"learning_rate": 2.2957894736842105e-05,
"loss": 1.5089,
"step": 5460
},
{
"epoch": 15.49,
"learning_rate": 2.2942105263157896e-05,
"loss": 1.5036,
"step": 5470
},
{
"epoch": 15.52,
"learning_rate": 2.2926315789473683e-05,
"loss": 1.517,
"step": 5480
},
{
"epoch": 15.55,
"learning_rate": 2.2910526315789473e-05,
"loss": 1.5061,
"step": 5490
},
{
"epoch": 15.58,
"learning_rate": 2.2894736842105263e-05,
"loss": 1.4994,
"step": 5500
},
{
"epoch": 15.61,
"learning_rate": 2.2878947368421054e-05,
"loss": 1.5256,
"step": 5510
},
{
"epoch": 15.64,
"learning_rate": 2.2863157894736844e-05,
"loss": 1.5177,
"step": 5520
},
{
"epoch": 15.66,
"learning_rate": 2.284736842105263e-05,
"loss": 1.5047,
"step": 5530
},
{
"epoch": 15.69,
"learning_rate": 2.283157894736842e-05,
"loss": 1.5303,
"step": 5540
},
{
"epoch": 15.72,
"learning_rate": 2.281578947368421e-05,
"loss": 1.5366,
"step": 5550
},
{
"epoch": 15.75,
"learning_rate": 2.2800000000000002e-05,
"loss": 1.5397,
"step": 5560
},
{
"epoch": 15.78,
"learning_rate": 2.278421052631579e-05,
"loss": 1.5016,
"step": 5570
},
{
"epoch": 15.81,
"learning_rate": 2.276842105263158e-05,
"loss": 1.51,
"step": 5580
},
{
"epoch": 15.83,
"learning_rate": 2.275263157894737e-05,
"loss": 1.5091,
"step": 5590
},
{
"epoch": 15.86,
"learning_rate": 2.273684210526316e-05,
"loss": 1.5116,
"step": 5600
},
{
"epoch": 15.89,
"learning_rate": 2.2721052631578947e-05,
"loss": 1.503,
"step": 5610
},
{
"epoch": 15.92,
"learning_rate": 2.2705263157894737e-05,
"loss": 1.5021,
"step": 5620
},
{
"epoch": 15.95,
"learning_rate": 2.2689473684210524e-05,
"loss": 1.5252,
"step": 5630
},
{
"epoch": 15.97,
"learning_rate": 2.2673684210526318e-05,
"loss": 1.5087,
"step": 5640
},
{
"epoch": 16.01,
"learning_rate": 2.2657894736842105e-05,
"loss": 1.6352,
"step": 5650
},
{
"epoch": 16.03,
"learning_rate": 2.2642105263157895e-05,
"loss": 1.4857,
"step": 5660
},
{
"epoch": 16.06,
"learning_rate": 2.2626315789473685e-05,
"loss": 1.5041,
"step": 5670
},
{
"epoch": 16.09,
"learning_rate": 2.2610526315789472e-05,
"loss": 1.544,
"step": 5680
},
{
"epoch": 16.12,
"learning_rate": 2.2594736842105266e-05,
"loss": 1.5226,
"step": 5690
},
{
"epoch": 16.15,
"learning_rate": 2.2578947368421053e-05,
"loss": 1.5118,
"step": 5700
},
{
"epoch": 16.18,
"learning_rate": 2.2563157894736843e-05,
"loss": 1.5095,
"step": 5710
},
{
"epoch": 16.2,
"learning_rate": 2.254736842105263e-05,
"loss": 1.5032,
"step": 5720
},
{
"epoch": 16.23,
"learning_rate": 2.2531578947368424e-05,
"loss": 1.4881,
"step": 5730
},
{
"epoch": 16.26,
"learning_rate": 2.251578947368421e-05,
"loss": 1.4976,
"step": 5740
},
{
"epoch": 16.29,
"learning_rate": 2.25e-05,
"loss": 1.5119,
"step": 5750
},
{
"epoch": 16.32,
"learning_rate": 2.248421052631579e-05,
"loss": 1.5084,
"step": 5760
},
{
"epoch": 16.34,
"learning_rate": 2.246842105263158e-05,
"loss": 1.5138,
"step": 5770
},
{
"epoch": 16.37,
"learning_rate": 2.245263157894737e-05,
"loss": 1.4983,
"step": 5780
},
{
"epoch": 16.4,
"learning_rate": 2.243684210526316e-05,
"loss": 1.4821,
"step": 5790
},
{
"epoch": 16.43,
"learning_rate": 2.2421052631578946e-05,
"loss": 1.5076,
"step": 5800
},
{
"epoch": 16.46,
"learning_rate": 2.2405263157894737e-05,
"loss": 1.4939,
"step": 5810
},
{
"epoch": 16.49,
"learning_rate": 2.2389473684210527e-05,
"loss": 1.4924,
"step": 5820
},
{
"epoch": 16.51,
"learning_rate": 2.2373684210526317e-05,
"loss": 1.4835,
"step": 5830
},
{
"epoch": 16.54,
"learning_rate": 2.2357894736842108e-05,
"loss": 1.4857,
"step": 5840
},
{
"epoch": 16.57,
"learning_rate": 2.2342105263157895e-05,
"loss": 1.5067,
"step": 5850
},
{
"epoch": 16.6,
"learning_rate": 2.2326315789473685e-05,
"loss": 1.5082,
"step": 5860
},
{
"epoch": 16.63,
"learning_rate": 2.2310526315789475e-05,
"loss": 1.5156,
"step": 5870
},
{
"epoch": 16.66,
"learning_rate": 2.2294736842105266e-05,
"loss": 1.5161,
"step": 5880
},
{
"epoch": 16.68,
"learning_rate": 2.2278947368421053e-05,
"loss": 1.5042,
"step": 5890
},
{
"epoch": 16.71,
"learning_rate": 2.2263157894736843e-05,
"loss": 1.5196,
"step": 5900
},
{
"epoch": 16.74,
"learning_rate": 2.224736842105263e-05,
"loss": 1.5198,
"step": 5910
},
{
"epoch": 16.77,
"learning_rate": 2.2231578947368424e-05,
"loss": 1.5017,
"step": 5920
},
{
"epoch": 16.8,
"learning_rate": 2.221578947368421e-05,
"loss": 1.5016,
"step": 5930
},
{
"epoch": 16.83,
"learning_rate": 2.22e-05,
"loss": 1.487,
"step": 5940
},
{
"epoch": 16.85,
"learning_rate": 2.2184210526315788e-05,
"loss": 1.5135,
"step": 5950
},
{
"epoch": 16.88,
"learning_rate": 2.216842105263158e-05,
"loss": 1.4875,
"step": 5960
},
{
"epoch": 16.91,
"learning_rate": 2.215263157894737e-05,
"loss": 1.5154,
"step": 5970
},
{
"epoch": 16.94,
"learning_rate": 2.213684210526316e-05,
"loss": 1.5004,
"step": 5980
},
{
"epoch": 16.97,
"learning_rate": 2.2121052631578946e-05,
"loss": 1.4995,
"step": 5990
},
{
"epoch": 16.99,
"learning_rate": 2.2105263157894736e-05,
"loss": 1.4987,
"step": 6000
},
{
"epoch": 16.99,
"eval_denotation_accuracy": 0.5732956552454963,
"eval_loss": 2.120565891265869,
"eval_runtime": 374.0788,
"eval_samples_per_second": 7.568,
"eval_steps_per_second": 1.893,
"step": 6000
},
{
"epoch": 17.03,
"learning_rate": 2.208947368421053e-05,
"loss": 1.6182,
"step": 6010
},
{
"epoch": 17.05,
"learning_rate": 2.2073684210526317e-05,
"loss": 1.494,
"step": 6020
},
{
"epoch": 17.08,
"learning_rate": 2.2057894736842107e-05,
"loss": 1.4837,
"step": 6030
},
{
"epoch": 17.11,
"learning_rate": 2.2042105263157894e-05,
"loss": 1.4926,
"step": 6040
},
{
"epoch": 17.14,
"learning_rate": 2.2026315789473684e-05,
"loss": 1.494,
"step": 6050
},
{
"epoch": 17.17,
"learning_rate": 2.2010526315789475e-05,
"loss": 1.4765,
"step": 6060
},
{
"epoch": 17.19,
"learning_rate": 2.1994736842105265e-05,
"loss": 1.4961,
"step": 6070
},
{
"epoch": 17.22,
"learning_rate": 2.1978947368421052e-05,
"loss": 1.4857,
"step": 6080
},
{
"epoch": 17.25,
"learning_rate": 2.1963157894736842e-05,
"loss": 1.5008,
"step": 6090
},
{
"epoch": 17.28,
"learning_rate": 2.1947368421052633e-05,
"loss": 1.4987,
"step": 6100
},
{
"epoch": 17.31,
"learning_rate": 2.1931578947368423e-05,
"loss": 1.4967,
"step": 6110
},
{
"epoch": 17.34,
"learning_rate": 2.191578947368421e-05,
"loss": 1.4939,
"step": 6120
},
{
"epoch": 17.36,
"learning_rate": 2.19e-05,
"loss": 1.4838,
"step": 6130
},
{
"epoch": 17.39,
"learning_rate": 2.1884210526315787e-05,
"loss": 1.5048,
"step": 6140
},
{
"epoch": 17.42,
"learning_rate": 2.186842105263158e-05,
"loss": 1.494,
"step": 6150
},
{
"epoch": 17.45,
"learning_rate": 2.1852631578947368e-05,
"loss": 1.5121,
"step": 6160
},
{
"epoch": 17.48,
"learning_rate": 2.183684210526316e-05,
"loss": 1.5,
"step": 6170
},
{
"epoch": 17.51,
"learning_rate": 2.182105263157895e-05,
"loss": 1.4775,
"step": 6180
},
{
"epoch": 17.53,
"learning_rate": 2.1805263157894736e-05,
"loss": 1.4803,
"step": 6190
},
{
"epoch": 17.56,
"learning_rate": 2.178947368421053e-05,
"loss": 1.4727,
"step": 6200
},
{
"epoch": 17.59,
"learning_rate": 2.1773684210526316e-05,
"loss": 1.4691,
"step": 6210
},
{
"epoch": 17.62,
"learning_rate": 2.1757894736842107e-05,
"loss": 1.4769,
"step": 6220
},
{
"epoch": 17.65,
"learning_rate": 2.1742105263157894e-05,
"loss": 1.4741,
"step": 6230
},
{
"epoch": 17.68,
"learning_rate": 2.1726315789473687e-05,
"loss": 1.4794,
"step": 6240
},
{
"epoch": 17.7,
"learning_rate": 2.1710526315789474e-05,
"loss": 1.4919,
"step": 6250
},
{
"epoch": 17.73,
"learning_rate": 2.1694736842105265e-05,
"loss": 1.5191,
"step": 6260
},
{
"epoch": 17.76,
"learning_rate": 2.167894736842105e-05,
"loss": 1.4839,
"step": 6270
},
{
"epoch": 17.79,
"learning_rate": 2.1663157894736842e-05,
"loss": 1.5003,
"step": 6280
},
{
"epoch": 17.82,
"learning_rate": 2.1647368421052632e-05,
"loss": 1.4856,
"step": 6290
},
{
"epoch": 17.84,
"learning_rate": 2.1631578947368423e-05,
"loss": 1.5092,
"step": 6300
},
{
"epoch": 17.87,
"learning_rate": 2.161578947368421e-05,
"loss": 1.4915,
"step": 6310
},
{
"epoch": 17.9,
"learning_rate": 2.16e-05,
"loss": 1.5019,
"step": 6320
},
{
"epoch": 17.93,
"learning_rate": 2.1584210526315787e-05,
"loss": 1.4821,
"step": 6330
},
{
"epoch": 17.96,
"learning_rate": 2.156842105263158e-05,
"loss": 1.4982,
"step": 6340
},
{
"epoch": 17.99,
"learning_rate": 2.155263157894737e-05,
"loss": 1.485,
"step": 6350
},
{
"epoch": 18.02,
"learning_rate": 2.1536842105263158e-05,
"loss": 1.6133,
"step": 6360
},
{
"epoch": 18.05,
"learning_rate": 2.1521052631578948e-05,
"loss": 1.484,
"step": 6370
},
{
"epoch": 18.07,
"learning_rate": 2.150526315789474e-05,
"loss": 1.4784,
"step": 6380
},
{
"epoch": 18.1,
"learning_rate": 2.148947368421053e-05,
"loss": 1.4662,
"step": 6390
},
{
"epoch": 18.13,
"learning_rate": 2.1473684210526316e-05,
"loss": 1.4904,
"step": 6400
},
{
"epoch": 18.16,
"learning_rate": 2.1457894736842106e-05,
"loss": 1.4876,
"step": 6410
},
{
"epoch": 18.19,
"learning_rate": 2.1442105263157893e-05,
"loss": 1.5111,
"step": 6420
},
{
"epoch": 18.21,
"learning_rate": 2.1426315789473687e-05,
"loss": 1.4777,
"step": 6430
},
{
"epoch": 18.24,
"learning_rate": 2.1410526315789474e-05,
"loss": 1.4912,
"step": 6440
},
{
"epoch": 18.27,
"learning_rate": 2.1394736842105264e-05,
"loss": 1.4765,
"step": 6450
},
{
"epoch": 18.3,
"learning_rate": 2.137894736842105e-05,
"loss": 1.4948,
"step": 6460
},
{
"epoch": 18.33,
"learning_rate": 2.136315789473684e-05,
"loss": 1.475,
"step": 6470
},
{
"epoch": 18.36,
"learning_rate": 2.1347368421052632e-05,
"loss": 1.4844,
"step": 6480
},
{
"epoch": 18.38,
"learning_rate": 2.1331578947368422e-05,
"loss": 1.4933,
"step": 6490
},
{
"epoch": 18.41,
"learning_rate": 2.1315789473684212e-05,
"loss": 1.4813,
"step": 6500
},
{
"epoch": 18.44,
"learning_rate": 2.13e-05,
"loss": 1.4783,
"step": 6510
},
{
"epoch": 18.47,
"learning_rate": 2.1284210526315793e-05,
"loss": 1.4781,
"step": 6520
},
{
"epoch": 18.5,
"learning_rate": 2.126842105263158e-05,
"loss": 1.4926,
"step": 6530
},
{
"epoch": 18.53,
"learning_rate": 2.125263157894737e-05,
"loss": 1.4912,
"step": 6540
},
{
"epoch": 18.55,
"learning_rate": 2.1236842105263157e-05,
"loss": 1.4714,
"step": 6550
},
{
"epoch": 18.58,
"learning_rate": 2.1221052631578948e-05,
"loss": 1.4949,
"step": 6560
},
{
"epoch": 18.61,
"learning_rate": 2.1205263157894738e-05,
"loss": 1.4994,
"step": 6570
},
{
"epoch": 18.64,
"learning_rate": 2.118947368421053e-05,
"loss": 1.4855,
"step": 6580
},
{
"epoch": 18.67,
"learning_rate": 2.1173684210526315e-05,
"loss": 1.4913,
"step": 6590
},
{
"epoch": 18.7,
"learning_rate": 2.1157894736842106e-05,
"loss": 1.4812,
"step": 6600
},
{
"epoch": 18.72,
"learning_rate": 2.1142105263157893e-05,
"loss": 1.4969,
"step": 6610
},
{
"epoch": 18.75,
"learning_rate": 2.1126315789473686e-05,
"loss": 1.4852,
"step": 6620
},
{
"epoch": 18.78,
"learning_rate": 2.1110526315789473e-05,
"loss": 1.4857,
"step": 6630
},
{
"epoch": 18.81,
"learning_rate": 2.1094736842105264e-05,
"loss": 1.4952,
"step": 6640
},
{
"epoch": 18.84,
"learning_rate": 2.107894736842105e-05,
"loss": 1.4932,
"step": 6650
},
{
"epoch": 18.86,
"learning_rate": 2.1063157894736844e-05,
"loss": 1.4755,
"step": 6660
},
{
"epoch": 18.89,
"learning_rate": 2.1047368421052635e-05,
"loss": 1.4817,
"step": 6670
},
{
"epoch": 18.92,
"learning_rate": 2.103157894736842e-05,
"loss": 1.4837,
"step": 6680
},
{
"epoch": 18.95,
"learning_rate": 2.1015789473684212e-05,
"loss": 1.4852,
"step": 6690
},
{
"epoch": 18.98,
"learning_rate": 2.1e-05,
"loss": 1.4877,
"step": 6700
},
{
"epoch": 19.01,
"learning_rate": 2.0984210526315793e-05,
"loss": 1.6275,
"step": 6710
},
{
"epoch": 19.04,
"learning_rate": 2.096842105263158e-05,
"loss": 1.4863,
"step": 6720
},
{
"epoch": 19.06,
"learning_rate": 2.095263157894737e-05,
"loss": 1.4741,
"step": 6730
},
{
"epoch": 19.09,
"learning_rate": 2.0936842105263157e-05,
"loss": 1.4807,
"step": 6740
},
{
"epoch": 19.12,
"learning_rate": 2.0921052631578947e-05,
"loss": 1.4751,
"step": 6750
},
{
"epoch": 19.15,
"learning_rate": 2.0905263157894737e-05,
"loss": 1.4796,
"step": 6760
},
{
"epoch": 19.18,
"learning_rate": 2.0889473684210528e-05,
"loss": 1.4745,
"step": 6770
},
{
"epoch": 19.21,
"learning_rate": 2.0873684210526315e-05,
"loss": 1.4767,
"step": 6780
},
{
"epoch": 19.23,
"learning_rate": 2.0857894736842105e-05,
"loss": 1.4831,
"step": 6790
},
{
"epoch": 19.26,
"learning_rate": 2.0842105263157895e-05,
"loss": 1.4649,
"step": 6800
},
{
"epoch": 19.29,
"learning_rate": 2.0826315789473686e-05,
"loss": 1.4696,
"step": 6810
},
{
"epoch": 19.32,
"learning_rate": 2.0810526315789473e-05,
"loss": 1.472,
"step": 6820
},
{
"epoch": 19.35,
"learning_rate": 2.0794736842105263e-05,
"loss": 1.4631,
"step": 6830
},
{
"epoch": 19.38,
"learning_rate": 2.0778947368421053e-05,
"loss": 1.4755,
"step": 6840
},
{
"epoch": 19.4,
"learning_rate": 2.0763157894736844e-05,
"loss": 1.483,
"step": 6850
},
{
"epoch": 19.43,
"learning_rate": 2.0747368421052634e-05,
"loss": 1.4855,
"step": 6860
},
{
"epoch": 19.46,
"learning_rate": 2.073157894736842e-05,
"loss": 1.476,
"step": 6870
},
{
"epoch": 19.49,
"learning_rate": 2.071578947368421e-05,
"loss": 1.4683,
"step": 6880
},
{
"epoch": 19.52,
"learning_rate": 2.07e-05,
"loss": 1.4768,
"step": 6890
},
{
"epoch": 19.55,
"learning_rate": 2.0684210526315792e-05,
"loss": 1.4775,
"step": 6900
},
{
"epoch": 19.57,
"learning_rate": 2.066842105263158e-05,
"loss": 1.476,
"step": 6910
},
{
"epoch": 19.6,
"learning_rate": 2.065263157894737e-05,
"loss": 1.4981,
"step": 6920
},
{
"epoch": 19.63,
"learning_rate": 2.0636842105263156e-05,
"loss": 1.4777,
"step": 6930
},
{
"epoch": 19.66,
"learning_rate": 2.062105263157895e-05,
"loss": 1.4649,
"step": 6940
},
{
"epoch": 19.69,
"learning_rate": 2.0605263157894737e-05,
"loss": 1.4702,
"step": 6950
},
{
"epoch": 19.71,
"learning_rate": 2.0589473684210527e-05,
"loss": 1.4714,
"step": 6960
},
{
"epoch": 19.74,
"learning_rate": 2.0573684210526314e-05,
"loss": 1.4763,
"step": 6970
},
{
"epoch": 19.77,
"learning_rate": 2.0557894736842105e-05,
"loss": 1.4748,
"step": 6980
},
{
"epoch": 19.8,
"learning_rate": 2.0542105263157895e-05,
"loss": 1.4655,
"step": 6990
},
{
"epoch": 19.83,
"learning_rate": 2.0526315789473685e-05,
"loss": 1.4764,
"step": 7000
},
{
"epoch": 19.83,
"eval_denotation_accuracy": 0.5810667608618862,
"eval_loss": 2.136824607849121,
"eval_runtime": 369.0602,
"eval_samples_per_second": 7.671,
"eval_steps_per_second": 1.918,
"step": 7000
},
{
"epoch": 19.86,
"learning_rate": 2.0510526315789476e-05,
"loss": 1.4886,
"step": 7010
},
{
"epoch": 19.88,
"learning_rate": 2.0494736842105263e-05,
"loss": 1.4839,
"step": 7020
},
{
"epoch": 19.91,
"learning_rate": 2.0478947368421053e-05,
"loss": 1.4785,
"step": 7030
},
{
"epoch": 19.94,
"learning_rate": 2.0463157894736843e-05,
"loss": 1.485,
"step": 7040
},
{
"epoch": 19.97,
"learning_rate": 2.0447368421052634e-05,
"loss": 1.4685,
"step": 7050
},
{
"epoch": 20.0,
"learning_rate": 2.043157894736842e-05,
"loss": 1.4821,
"step": 7060
},
{
"epoch": 20.03,
"learning_rate": 2.041578947368421e-05,
"loss": 1.6062,
"step": 7070
},
{
"epoch": 20.06,
"learning_rate": 2.04e-05,
"loss": 1.4742,
"step": 7080
},
{
"epoch": 20.08,
"learning_rate": 2.038421052631579e-05,
"loss": 1.4791,
"step": 7090
},
{
"epoch": 20.11,
"learning_rate": 2.036842105263158e-05,
"loss": 1.4936,
"step": 7100
},
{
"epoch": 20.14,
"learning_rate": 2.035263157894737e-05,
"loss": 1.4764,
"step": 7110
},
{
"epoch": 20.17,
"learning_rate": 2.0336842105263156e-05,
"loss": 1.4798,
"step": 7120
},
{
"epoch": 20.2,
"learning_rate": 2.032105263157895e-05,
"loss": 1.4762,
"step": 7130
},
{
"epoch": 20.23,
"learning_rate": 2.0305263157894736e-05,
"loss": 1.4768,
"step": 7140
},
{
"epoch": 20.25,
"learning_rate": 2.0289473684210527e-05,
"loss": 1.4949,
"step": 7150
},
{
"epoch": 20.28,
"learning_rate": 2.0273684210526314e-05,
"loss": 1.4789,
"step": 7160
},
{
"epoch": 20.31,
"learning_rate": 2.0257894736842104e-05,
"loss": 1.4794,
"step": 7170
},
{
"epoch": 20.34,
"learning_rate": 2.0242105263157898e-05,
"loss": 1.4721,
"step": 7180
},
{
"epoch": 20.37,
"learning_rate": 2.0226315789473685e-05,
"loss": 1.4656,
"step": 7190
},
{
"epoch": 20.4,
"learning_rate": 2.0210526315789475e-05,
"loss": 1.4754,
"step": 7200
},
{
"epoch": 20.42,
"learning_rate": 2.0194736842105262e-05,
"loss": 1.4671,
"step": 7210
},
{
"epoch": 20.45,
"learning_rate": 2.0178947368421056e-05,
"loss": 1.462,
"step": 7220
},
{
"epoch": 20.48,
"learning_rate": 2.0163157894736843e-05,
"loss": 1.4633,
"step": 7230
},
{
"epoch": 20.51,
"learning_rate": 2.0147368421052633e-05,
"loss": 1.4769,
"step": 7240
},
{
"epoch": 20.54,
"learning_rate": 2.013157894736842e-05,
"loss": 1.5024,
"step": 7250
},
{
"epoch": 20.57,
"learning_rate": 2.011578947368421e-05,
"loss": 1.4809,
"step": 7260
},
{
"epoch": 20.59,
"learning_rate": 2.01e-05,
"loss": 1.4737,
"step": 7270
},
{
"epoch": 20.62,
"learning_rate": 2.008421052631579e-05,
"loss": 1.4723,
"step": 7280
},
{
"epoch": 20.65,
"learning_rate": 2.0068421052631578e-05,
"loss": 1.4826,
"step": 7290
},
{
"epoch": 20.68,
"learning_rate": 2.0052631578947368e-05,
"loss": 1.4702,
"step": 7300
},
{
"epoch": 20.71,
"learning_rate": 2.0036842105263155e-05,
"loss": 1.4806,
"step": 7310
},
{
"epoch": 20.73,
"learning_rate": 2.002105263157895e-05,
"loss": 1.4684,
"step": 7320
},
{
"epoch": 20.76,
"learning_rate": 2.0005263157894736e-05,
"loss": 1.4692,
"step": 7330
},
{
"epoch": 20.79,
"learning_rate": 1.9989473684210526e-05,
"loss": 1.4731,
"step": 7340
},
{
"epoch": 20.82,
"learning_rate": 1.9973684210526317e-05,
"loss": 1.4706,
"step": 7350
},
{
"epoch": 20.85,
"learning_rate": 1.9957894736842107e-05,
"loss": 1.4721,
"step": 7360
},
{
"epoch": 20.88,
"learning_rate": 1.9942105263157897e-05,
"loss": 1.4863,
"step": 7370
},
{
"epoch": 20.9,
"learning_rate": 1.9926315789473684e-05,
"loss": 1.4697,
"step": 7380
},
{
"epoch": 20.93,
"learning_rate": 1.9910526315789475e-05,
"loss": 1.4793,
"step": 7390
},
{
"epoch": 20.96,
"learning_rate": 1.989473684210526e-05,
"loss": 1.4908,
"step": 7400
},
{
"epoch": 20.99,
"learning_rate": 1.9878947368421055e-05,
"loss": 1.4859,
"step": 7410
},
{
"epoch": 21.02,
"learning_rate": 1.9863157894736842e-05,
"loss": 1.5996,
"step": 7420
},
{
"epoch": 21.05,
"learning_rate": 1.9847368421052633e-05,
"loss": 1.4811,
"step": 7430
},
{
"epoch": 21.08,
"learning_rate": 1.983157894736842e-05,
"loss": 1.4806,
"step": 7440
},
{
"epoch": 21.1,
"learning_rate": 1.9815789473684213e-05,
"loss": 1.4688,
"step": 7450
},
{
"epoch": 21.13,
"learning_rate": 1.98e-05,
"loss": 1.484,
"step": 7460
},
{
"epoch": 21.16,
"learning_rate": 1.978421052631579e-05,
"loss": 1.4733,
"step": 7470
},
{
"epoch": 21.19,
"learning_rate": 1.9768421052631577e-05,
"loss": 1.4781,
"step": 7480
},
{
"epoch": 21.22,
"learning_rate": 1.9752631578947368e-05,
"loss": 1.4683,
"step": 7490
},
{
"epoch": 21.25,
"learning_rate": 1.9736842105263158e-05,
"loss": 1.481,
"step": 7500
},
{
"epoch": 21.27,
"learning_rate": 1.972105263157895e-05,
"loss": 1.4658,
"step": 7510
},
{
"epoch": 21.3,
"learning_rate": 1.970526315789474e-05,
"loss": 1.4639,
"step": 7520
},
{
"epoch": 21.33,
"learning_rate": 1.9689473684210526e-05,
"loss": 1.4731,
"step": 7530
},
{
"epoch": 21.36,
"learning_rate": 1.9673684210526316e-05,
"loss": 1.4683,
"step": 7540
},
{
"epoch": 21.39,
"learning_rate": 1.9657894736842106e-05,
"loss": 1.4687,
"step": 7550
},
{
"epoch": 21.42,
"learning_rate": 1.9642105263157897e-05,
"loss": 1.4824,
"step": 7560
},
{
"epoch": 21.44,
"learning_rate": 1.9626315789473684e-05,
"loss": 1.47,
"step": 7570
},
{
"epoch": 21.47,
"learning_rate": 1.9610526315789474e-05,
"loss": 1.4842,
"step": 7580
},
{
"epoch": 21.5,
"learning_rate": 1.9594736842105264e-05,
"loss": 1.4659,
"step": 7590
},
{
"epoch": 21.53,
"learning_rate": 1.9578947368421055e-05,
"loss": 1.4776,
"step": 7600
},
{
"epoch": 21.56,
"learning_rate": 1.956315789473684e-05,
"loss": 1.4647,
"step": 7610
},
{
"epoch": 21.58,
"learning_rate": 1.9547368421052632e-05,
"loss": 1.5014,
"step": 7620
},
{
"epoch": 21.61,
"learning_rate": 1.953157894736842e-05,
"loss": 1.4737,
"step": 7630
},
{
"epoch": 21.64,
"learning_rate": 1.9515789473684213e-05,
"loss": 1.4659,
"step": 7640
},
{
"epoch": 21.67,
"learning_rate": 1.95e-05,
"loss": 1.4784,
"step": 7650
},
{
"epoch": 21.7,
"learning_rate": 1.948421052631579e-05,
"loss": 1.4617,
"step": 7660
},
{
"epoch": 21.73,
"learning_rate": 1.946842105263158e-05,
"loss": 1.4775,
"step": 7670
},
{
"epoch": 21.75,
"learning_rate": 1.9452631578947367e-05,
"loss": 1.4701,
"step": 7680
},
{
"epoch": 21.78,
"learning_rate": 1.943684210526316e-05,
"loss": 1.4719,
"step": 7690
},
{
"epoch": 21.81,
"learning_rate": 1.9421052631578948e-05,
"loss": 1.4584,
"step": 7700
},
{
"epoch": 21.84,
"learning_rate": 1.9405263157894738e-05,
"loss": 1.479,
"step": 7710
},
{
"epoch": 21.87,
"learning_rate": 1.9389473684210525e-05,
"loss": 1.4675,
"step": 7720
},
{
"epoch": 21.9,
"learning_rate": 1.937368421052632e-05,
"loss": 1.4719,
"step": 7730
},
{
"epoch": 21.92,
"learning_rate": 1.9357894736842106e-05,
"loss": 1.4812,
"step": 7740
},
{
"epoch": 21.95,
"learning_rate": 1.9342105263157896e-05,
"loss": 1.484,
"step": 7750
},
{
"epoch": 21.98,
"learning_rate": 1.9326315789473683e-05,
"loss": 1.4798,
"step": 7760
},
{
"epoch": 22.01,
"learning_rate": 1.9310526315789474e-05,
"loss": 1.5941,
"step": 7770
},
{
"epoch": 22.04,
"learning_rate": 1.9294736842105264e-05,
"loss": 1.4656,
"step": 7780
},
{
"epoch": 22.07,
"learning_rate": 1.9278947368421054e-05,
"loss": 1.4704,
"step": 7790
},
{
"epoch": 22.1,
"learning_rate": 1.926315789473684e-05,
"loss": 1.468,
"step": 7800
},
{
"epoch": 22.12,
"learning_rate": 1.924736842105263e-05,
"loss": 1.4671,
"step": 7810
},
{
"epoch": 22.15,
"learning_rate": 1.923157894736842e-05,
"loss": 1.4543,
"step": 7820
},
{
"epoch": 22.18,
"learning_rate": 1.9215789473684212e-05,
"loss": 1.4557,
"step": 7830
},
{
"epoch": 22.21,
"learning_rate": 1.9200000000000003e-05,
"loss": 1.4602,
"step": 7840
},
{
"epoch": 22.24,
"learning_rate": 1.918421052631579e-05,
"loss": 1.4582,
"step": 7850
},
{
"epoch": 22.27,
"learning_rate": 1.916842105263158e-05,
"loss": 1.4604,
"step": 7860
},
{
"epoch": 22.29,
"learning_rate": 1.915263157894737e-05,
"loss": 1.4581,
"step": 7870
},
{
"epoch": 22.32,
"learning_rate": 1.913684210526316e-05,
"loss": 1.4616,
"step": 7880
},
{
"epoch": 22.35,
"learning_rate": 1.9121052631578947e-05,
"loss": 1.4587,
"step": 7890
},
{
"epoch": 22.38,
"learning_rate": 1.9105263157894738e-05,
"loss": 1.4643,
"step": 7900
},
{
"epoch": 22.41,
"learning_rate": 1.9089473684210525e-05,
"loss": 1.4661,
"step": 7910
},
{
"epoch": 22.44,
"learning_rate": 1.907368421052632e-05,
"loss": 1.4728,
"step": 7920
},
{
"epoch": 22.46,
"learning_rate": 1.9057894736842105e-05,
"loss": 1.4609,
"step": 7930
},
{
"epoch": 22.49,
"learning_rate": 1.9042105263157896e-05,
"loss": 1.4672,
"step": 7940
},
{
"epoch": 22.52,
"learning_rate": 1.9026315789473683e-05,
"loss": 1.4643,
"step": 7950
},
{
"epoch": 22.55,
"learning_rate": 1.9010526315789473e-05,
"loss": 1.4592,
"step": 7960
},
{
"epoch": 22.58,
"learning_rate": 1.8994736842105263e-05,
"loss": 1.4795,
"step": 7970
},
{
"epoch": 22.6,
"learning_rate": 1.8978947368421054e-05,
"loss": 1.4669,
"step": 7980
},
{
"epoch": 22.63,
"learning_rate": 1.896315789473684e-05,
"loss": 1.4639,
"step": 7990
},
{
"epoch": 22.66,
"learning_rate": 1.894736842105263e-05,
"loss": 1.4649,
"step": 8000
},
{
"epoch": 22.66,
"eval_denotation_accuracy": 0.5694101024373013,
"eval_loss": 2.1465563774108887,
"eval_runtime": 367.8906,
"eval_samples_per_second": 7.695,
"eval_steps_per_second": 1.924,
"step": 8000
},
{
"epoch": 22.69,
"learning_rate": 1.8931578947368425e-05,
"loss": 1.4677,
"step": 8010
},
{
"epoch": 22.72,
"learning_rate": 1.891578947368421e-05,
"loss": 1.4773,
"step": 8020
},
{
"epoch": 22.75,
"learning_rate": 1.8900000000000002e-05,
"loss": 1.4968,
"step": 8030
},
{
"epoch": 22.77,
"learning_rate": 1.888421052631579e-05,
"loss": 1.4555,
"step": 8040
},
{
"epoch": 22.8,
"learning_rate": 1.886842105263158e-05,
"loss": 1.4749,
"step": 8050
},
{
"epoch": 22.83,
"learning_rate": 1.885263157894737e-05,
"loss": 1.4622,
"step": 8060
},
{
"epoch": 22.86,
"learning_rate": 1.883684210526316e-05,
"loss": 1.4731,
"step": 8070
},
{
"epoch": 22.89,
"learning_rate": 1.8821052631578947e-05,
"loss": 1.4608,
"step": 8080
},
{
"epoch": 22.92,
"learning_rate": 1.8805263157894737e-05,
"loss": 1.4608,
"step": 8090
},
{
"epoch": 22.94,
"learning_rate": 1.8789473684210524e-05,
"loss": 1.4561,
"step": 8100
},
{
"epoch": 22.97,
"learning_rate": 1.8773684210526318e-05,
"loss": 1.4714,
"step": 8110
},
{
"epoch": 23.0,
"learning_rate": 1.8757894736842105e-05,
"loss": 1.5981,
"step": 8120
},
{
"epoch": 23.03,
"learning_rate": 1.8742105263157895e-05,
"loss": 1.4634,
"step": 8130
},
{
"epoch": 23.06,
"learning_rate": 1.8726315789473682e-05,
"loss": 1.4691,
"step": 8140
},
{
"epoch": 23.09,
"learning_rate": 1.8710526315789476e-05,
"loss": 1.4606,
"step": 8150
},
{
"epoch": 23.12,
"learning_rate": 1.8694736842105263e-05,
"loss": 1.4549,
"step": 8160
},
{
"epoch": 23.14,
"learning_rate": 1.8678947368421053e-05,
"loss": 1.4595,
"step": 8170
},
{
"epoch": 23.17,
"learning_rate": 1.8663157894736844e-05,
"loss": 1.4512,
"step": 8180
},
{
"epoch": 23.2,
"learning_rate": 1.864736842105263e-05,
"loss": 1.4609,
"step": 8190
},
{
"epoch": 23.23,
"learning_rate": 1.8631578947368424e-05,
"loss": 1.4571,
"step": 8200
},
{
"epoch": 23.26,
"learning_rate": 1.861578947368421e-05,
"loss": 1.455,
"step": 8210
},
{
"epoch": 23.29,
"learning_rate": 1.86e-05,
"loss": 1.4581,
"step": 8220
},
{
"epoch": 23.31,
"learning_rate": 1.858421052631579e-05,
"loss": 1.4637,
"step": 8230
},
{
"epoch": 23.34,
"learning_rate": 1.856842105263158e-05,
"loss": 1.4472,
"step": 8240
},
{
"epoch": 23.37,
"learning_rate": 1.855263157894737e-05,
"loss": 1.4485,
"step": 8250
},
{
"epoch": 23.4,
"learning_rate": 1.853684210526316e-05,
"loss": 1.4692,
"step": 8260
},
{
"epoch": 23.43,
"learning_rate": 1.8521052631578946e-05,
"loss": 1.4647,
"step": 8270
},
{
"epoch": 23.45,
"learning_rate": 1.8505263157894737e-05,
"loss": 1.4576,
"step": 8280
},
{
"epoch": 23.48,
"learning_rate": 1.8489473684210527e-05,
"loss": 1.4552,
"step": 8290
},
{
"epoch": 23.51,
"learning_rate": 1.8473684210526317e-05,
"loss": 1.4576,
"step": 8300
},
{
"epoch": 23.54,
"learning_rate": 1.8457894736842104e-05,
"loss": 1.4523,
"step": 8310
},
{
"epoch": 23.57,
"learning_rate": 1.8442105263157895e-05,
"loss": 1.4652,
"step": 8320
},
{
"epoch": 23.6,
"learning_rate": 1.842631578947368e-05,
"loss": 1.4659,
"step": 8330
},
{
"epoch": 23.62,
"learning_rate": 1.8410526315789475e-05,
"loss": 1.4585,
"step": 8340
},
{
"epoch": 23.65,
"learning_rate": 1.8394736842105266e-05,
"loss": 1.4553,
"step": 8350
},
{
"epoch": 23.68,
"learning_rate": 1.8378947368421053e-05,
"loss": 1.4472,
"step": 8360
},
{
"epoch": 23.71,
"learning_rate": 1.8363157894736843e-05,
"loss": 1.4547,
"step": 8370
},
{
"epoch": 23.74,
"learning_rate": 1.834736842105263e-05,
"loss": 1.4645,
"step": 8380
},
{
"epoch": 23.77,
"learning_rate": 1.8331578947368424e-05,
"loss": 1.4682,
"step": 8390
},
{
"epoch": 23.79,
"learning_rate": 1.831578947368421e-05,
"loss": 1.4623,
"step": 8400
},
{
"epoch": 23.82,
"learning_rate": 1.83e-05,
"loss": 1.4597,
"step": 8410
},
{
"epoch": 23.85,
"learning_rate": 1.8284210526315788e-05,
"loss": 1.4626,
"step": 8420
},
{
"epoch": 23.88,
"learning_rate": 1.826842105263158e-05,
"loss": 1.4676,
"step": 8430
},
{
"epoch": 23.91,
"learning_rate": 1.825263157894737e-05,
"loss": 1.464,
"step": 8440
},
{
"epoch": 23.94,
"learning_rate": 1.823684210526316e-05,
"loss": 1.4632,
"step": 8450
},
{
"epoch": 23.96,
"learning_rate": 1.8221052631578946e-05,
"loss": 1.4781,
"step": 8460
},
{
"epoch": 23.99,
"learning_rate": 1.8205263157894736e-05,
"loss": 1.4641,
"step": 8470
},
{
"epoch": 24.02,
"learning_rate": 1.8189473684210527e-05,
"loss": 1.5895,
"step": 8480
},
{
"epoch": 24.05,
"learning_rate": 1.8173684210526317e-05,
"loss": 1.4559,
"step": 8490
},
{
"epoch": 24.08,
"learning_rate": 1.8157894736842107e-05,
"loss": 1.455,
"step": 8500
},
{
"epoch": 24.11,
"learning_rate": 1.8142105263157894e-05,
"loss": 1.4503,
"step": 8510
},
{
"epoch": 24.14,
"learning_rate": 1.8126315789473685e-05,
"loss": 1.4536,
"step": 8520
},
{
"epoch": 24.16,
"learning_rate": 1.8110526315789475e-05,
"loss": 1.4639,
"step": 8530
},
{
"epoch": 24.19,
"learning_rate": 1.8094736842105265e-05,
"loss": 1.4554,
"step": 8540
},
{
"epoch": 24.22,
"learning_rate": 1.8078947368421052e-05,
"loss": 1.4527,
"step": 8550
},
{
"epoch": 24.25,
"learning_rate": 1.8063157894736842e-05,
"loss": 1.4564,
"step": 8560
},
{
"epoch": 24.28,
"learning_rate": 1.8047368421052633e-05,
"loss": 1.4552,
"step": 8570
},
{
"epoch": 24.31,
"learning_rate": 1.8031578947368423e-05,
"loss": 1.4553,
"step": 8580
},
{
"epoch": 24.33,
"learning_rate": 1.801578947368421e-05,
"loss": 1.4624,
"step": 8590
},
{
"epoch": 24.36,
"learning_rate": 1.8e-05,
"loss": 1.4666,
"step": 8600
},
{
"epoch": 24.39,
"learning_rate": 1.7984210526315787e-05,
"loss": 1.483,
"step": 8610
},
{
"epoch": 24.42,
"learning_rate": 1.796842105263158e-05,
"loss": 1.4497,
"step": 8620
},
{
"epoch": 24.45,
"learning_rate": 1.7952631578947368e-05,
"loss": 1.4499,
"step": 8630
},
{
"epoch": 24.47,
"learning_rate": 1.793684210526316e-05,
"loss": 1.4621,
"step": 8640
},
{
"epoch": 24.5,
"learning_rate": 1.7921052631578945e-05,
"loss": 1.4563,
"step": 8650
},
{
"epoch": 24.53,
"learning_rate": 1.7905263157894736e-05,
"loss": 1.4604,
"step": 8660
},
{
"epoch": 24.56,
"learning_rate": 1.788947368421053e-05,
"loss": 1.459,
"step": 8670
},
{
"epoch": 24.59,
"learning_rate": 1.7873684210526316e-05,
"loss": 1.4535,
"step": 8680
},
{
"epoch": 24.62,
"learning_rate": 1.7857894736842107e-05,
"loss": 1.4607,
"step": 8690
},
{
"epoch": 24.64,
"learning_rate": 1.7842105263157894e-05,
"loss": 1.4631,
"step": 8700
},
{
"epoch": 24.67,
"learning_rate": 1.7826315789473687e-05,
"loss": 1.4546,
"step": 8710
},
{
"epoch": 24.7,
"learning_rate": 1.7810526315789474e-05,
"loss": 1.4585,
"step": 8720
},
{
"epoch": 24.73,
"learning_rate": 1.7794736842105265e-05,
"loss": 1.4513,
"step": 8730
},
{
"epoch": 24.76,
"learning_rate": 1.777894736842105e-05,
"loss": 1.4654,
"step": 8740
},
{
"epoch": 24.79,
"learning_rate": 1.7763157894736842e-05,
"loss": 1.469,
"step": 8750
},
{
"epoch": 24.81,
"learning_rate": 1.7747368421052632e-05,
"loss": 1.4568,
"step": 8760
},
{
"epoch": 24.84,
"learning_rate": 1.7731578947368423e-05,
"loss": 1.4563,
"step": 8770
},
{
"epoch": 24.87,
"learning_rate": 1.771578947368421e-05,
"loss": 1.4524,
"step": 8780
},
{
"epoch": 24.9,
"learning_rate": 1.77e-05,
"loss": 1.4634,
"step": 8790
},
{
"epoch": 24.93,
"learning_rate": 1.7684210526315787e-05,
"loss": 1.4664,
"step": 8800
},
{
"epoch": 24.96,
"learning_rate": 1.766842105263158e-05,
"loss": 1.4586,
"step": 8810
},
{
"epoch": 24.98,
"learning_rate": 1.7652631578947368e-05,
"loss": 1.4597,
"step": 8820
},
{
"epoch": 25.01,
"learning_rate": 1.7636842105263158e-05,
"loss": 1.5807,
"step": 8830
},
{
"epoch": 25.04,
"learning_rate": 1.7621052631578948e-05,
"loss": 1.4547,
"step": 8840
},
{
"epoch": 25.07,
"learning_rate": 1.760526315789474e-05,
"loss": 1.457,
"step": 8850
},
{
"epoch": 25.1,
"learning_rate": 1.758947368421053e-05,
"loss": 1.4622,
"step": 8860
},
{
"epoch": 25.13,
"learning_rate": 1.7573684210526316e-05,
"loss": 1.4571,
"step": 8870
},
{
"epoch": 25.16,
"learning_rate": 1.7557894736842106e-05,
"loss": 1.4649,
"step": 8880
},
{
"epoch": 25.18,
"learning_rate": 1.7542105263157893e-05,
"loss": 1.4504,
"step": 8890
},
{
"epoch": 25.21,
"learning_rate": 1.7526315789473687e-05,
"loss": 1.4478,
"step": 8900
},
{
"epoch": 25.24,
"learning_rate": 1.7510526315789474e-05,
"loss": 1.4472,
"step": 8910
},
{
"epoch": 25.27,
"learning_rate": 1.7494736842105264e-05,
"loss": 1.4518,
"step": 8920
},
{
"epoch": 25.3,
"learning_rate": 1.747894736842105e-05,
"loss": 1.4583,
"step": 8930
},
{
"epoch": 25.32,
"learning_rate": 1.7463157894736845e-05,
"loss": 1.4494,
"step": 8940
},
{
"epoch": 25.35,
"learning_rate": 1.7447368421052632e-05,
"loss": 1.4479,
"step": 8950
},
{
"epoch": 25.38,
"learning_rate": 1.7431578947368422e-05,
"loss": 1.4529,
"step": 8960
},
{
"epoch": 25.41,
"learning_rate": 1.741578947368421e-05,
"loss": 1.4584,
"step": 8970
},
{
"epoch": 25.44,
"learning_rate": 1.74e-05,
"loss": 1.4568,
"step": 8980
},
{
"epoch": 25.47,
"learning_rate": 1.738421052631579e-05,
"loss": 1.4552,
"step": 8990
},
{
"epoch": 25.49,
"learning_rate": 1.736842105263158e-05,
"loss": 1.4578,
"step": 9000
},
{
"epoch": 25.49,
"eval_denotation_accuracy": 0.5655245496291064,
"eval_loss": 2.1795523166656494,
"eval_runtime": 384.2494,
"eval_samples_per_second": 7.368,
"eval_steps_per_second": 1.843,
"step": 9000
},
{
"epoch": 25.52,
"learning_rate": 1.735263157894737e-05,
"loss": 1.4569,
"step": 9010
},
{
"epoch": 25.55,
"learning_rate": 1.7336842105263157e-05,
"loss": 1.4494,
"step": 9020
},
{
"epoch": 25.58,
"learning_rate": 1.7321052631578948e-05,
"loss": 1.4565,
"step": 9030
},
{
"epoch": 25.61,
"learning_rate": 1.7305263157894738e-05,
"loss": 1.4538,
"step": 9040
},
{
"epoch": 25.64,
"learning_rate": 1.728947368421053e-05,
"loss": 1.4593,
"step": 9050
},
{
"epoch": 25.66,
"learning_rate": 1.7273684210526315e-05,
"loss": 1.4494,
"step": 9060
},
{
"epoch": 25.69,
"learning_rate": 1.7257894736842106e-05,
"loss": 1.4544,
"step": 9070
},
{
"epoch": 25.72,
"learning_rate": 1.7242105263157896e-05,
"loss": 1.4545,
"step": 9080
},
{
"epoch": 25.75,
"learning_rate": 1.7226315789473686e-05,
"loss": 1.4551,
"step": 9090
},
{
"epoch": 25.78,
"learning_rate": 1.7210526315789473e-05,
"loss": 1.4566,
"step": 9100
},
{
"epoch": 25.81,
"learning_rate": 1.7194736842105264e-05,
"loss": 1.4476,
"step": 9110
},
{
"epoch": 25.83,
"learning_rate": 1.717894736842105e-05,
"loss": 1.4564,
"step": 9120
},
{
"epoch": 25.86,
"learning_rate": 1.7163157894736844e-05,
"loss": 1.4504,
"step": 9130
},
{
"epoch": 25.89,
"learning_rate": 1.714736842105263e-05,
"loss": 1.4464,
"step": 9140
},
{
"epoch": 25.92,
"learning_rate": 1.713157894736842e-05,
"loss": 1.4613,
"step": 9150
},
{
"epoch": 25.95,
"learning_rate": 1.711578947368421e-05,
"loss": 1.452,
"step": 9160
},
{
"epoch": 25.97,
"learning_rate": 1.71e-05,
"loss": 1.4672,
"step": 9170
},
{
"epoch": 26.01,
"learning_rate": 1.7084210526315793e-05,
"loss": 1.5794,
"step": 9180
},
{
"epoch": 26.03,
"learning_rate": 1.706842105263158e-05,
"loss": 1.4436,
"step": 9190
},
{
"epoch": 26.06,
"learning_rate": 1.705263157894737e-05,
"loss": 1.4512,
"step": 9200
},
{
"epoch": 26.09,
"learning_rate": 1.7036842105263157e-05,
"loss": 1.4582,
"step": 9210
},
{
"epoch": 26.12,
"learning_rate": 1.702105263157895e-05,
"loss": 1.4483,
"step": 9220
},
{
"epoch": 26.15,
"learning_rate": 1.7005263157894738e-05,
"loss": 1.4564,
"step": 9230
},
{
"epoch": 26.18,
"learning_rate": 1.6989473684210528e-05,
"loss": 1.4485,
"step": 9240
},
{
"epoch": 26.2,
"learning_rate": 1.6973684210526315e-05,
"loss": 1.4552,
"step": 9250
},
{
"epoch": 26.23,
"learning_rate": 1.6957894736842105e-05,
"loss": 1.4425,
"step": 9260
},
{
"epoch": 26.26,
"learning_rate": 1.6942105263157896e-05,
"loss": 1.4505,
"step": 9270
},
{
"epoch": 26.29,
"learning_rate": 1.6926315789473686e-05,
"loss": 1.4553,
"step": 9280
},
{
"epoch": 26.32,
"learning_rate": 1.6910526315789473e-05,
"loss": 1.4532,
"step": 9290
},
{
"epoch": 26.34,
"learning_rate": 1.6894736842105263e-05,
"loss": 1.4607,
"step": 9300
},
{
"epoch": 26.37,
"learning_rate": 1.687894736842105e-05,
"loss": 1.4574,
"step": 9310
},
{
"epoch": 26.4,
"learning_rate": 1.6863157894736844e-05,
"loss": 1.4539,
"step": 9320
},
{
"epoch": 26.43,
"learning_rate": 1.684736842105263e-05,
"loss": 1.4613,
"step": 9330
},
{
"epoch": 26.46,
"learning_rate": 1.683157894736842e-05,
"loss": 1.4561,
"step": 9340
},
{
"epoch": 26.49,
"learning_rate": 1.681578947368421e-05,
"loss": 1.4589,
"step": 9350
},
{
"epoch": 26.51,
"learning_rate": 1.6800000000000002e-05,
"loss": 1.4499,
"step": 9360
},
{
"epoch": 26.54,
"learning_rate": 1.6784210526315792e-05,
"loss": 1.4518,
"step": 9370
},
{
"epoch": 26.57,
"learning_rate": 1.676842105263158e-05,
"loss": 1.4559,
"step": 9380
},
{
"epoch": 26.6,
"learning_rate": 1.675263157894737e-05,
"loss": 1.4573,
"step": 9390
},
{
"epoch": 26.63,
"learning_rate": 1.6736842105263156e-05,
"loss": 1.4822,
"step": 9400
},
{
"epoch": 26.66,
"learning_rate": 1.672105263157895e-05,
"loss": 1.4559,
"step": 9410
},
{
"epoch": 26.68,
"learning_rate": 1.6705263157894737e-05,
"loss": 1.4518,
"step": 9420
},
{
"epoch": 26.71,
"learning_rate": 1.6689473684210527e-05,
"loss": 1.4506,
"step": 9430
},
{
"epoch": 26.74,
"learning_rate": 1.6673684210526314e-05,
"loss": 1.4456,
"step": 9440
},
{
"epoch": 26.77,
"learning_rate": 1.6657894736842105e-05,
"loss": 1.4601,
"step": 9450
},
{
"epoch": 26.8,
"learning_rate": 1.6642105263157895e-05,
"loss": 1.453,
"step": 9460
},
{
"epoch": 26.83,
"learning_rate": 1.6626315789473685e-05,
"loss": 1.4559,
"step": 9470
},
{
"epoch": 26.85,
"learning_rate": 1.6610526315789472e-05,
"loss": 1.4578,
"step": 9480
},
{
"epoch": 26.88,
"learning_rate": 1.6594736842105263e-05,
"loss": 1.4607,
"step": 9490
},
{
"epoch": 26.91,
"learning_rate": 1.6578947368421053e-05,
"loss": 1.4576,
"step": 9500
},
{
"epoch": 26.94,
"learning_rate": 1.6563157894736843e-05,
"loss": 1.4611,
"step": 9510
},
{
"epoch": 26.97,
"learning_rate": 1.6547368421052634e-05,
"loss": 1.4605,
"step": 9520
},
{
"epoch": 26.99,
"learning_rate": 1.653157894736842e-05,
"loss": 1.4587,
"step": 9530
},
{
"epoch": 27.03,
"learning_rate": 1.651578947368421e-05,
"loss": 1.587,
"step": 9540
},
{
"epoch": 27.05,
"learning_rate": 1.65e-05,
"loss": 1.4476,
"step": 9550
},
{
"epoch": 27.08,
"learning_rate": 1.648421052631579e-05,
"loss": 1.4579,
"step": 9560
},
{
"epoch": 27.11,
"learning_rate": 1.646842105263158e-05,
"loss": 1.4519,
"step": 9570
},
{
"epoch": 27.14,
"learning_rate": 1.645263157894737e-05,
"loss": 1.4559,
"step": 9580
},
{
"epoch": 27.17,
"learning_rate": 1.6436842105263156e-05,
"loss": 1.4634,
"step": 9590
},
{
"epoch": 27.19,
"learning_rate": 1.642105263157895e-05,
"loss": 1.4613,
"step": 9600
},
{
"epoch": 27.22,
"learning_rate": 1.6405263157894737e-05,
"loss": 1.4512,
"step": 9610
},
{
"epoch": 27.25,
"learning_rate": 1.6389473684210527e-05,
"loss": 1.4537,
"step": 9620
},
{
"epoch": 27.28,
"learning_rate": 1.6373684210526314e-05,
"loss": 1.4579,
"step": 9630
},
{
"epoch": 27.31,
"learning_rate": 1.6357894736842108e-05,
"loss": 1.4478,
"step": 9640
},
{
"epoch": 27.34,
"learning_rate": 1.6342105263157894e-05,
"loss": 1.468,
"step": 9650
},
{
"epoch": 27.36,
"learning_rate": 1.6326315789473685e-05,
"loss": 1.4684,
"step": 9660
},
{
"epoch": 27.39,
"learning_rate": 1.6310526315789475e-05,
"loss": 1.4503,
"step": 9670
},
{
"epoch": 27.42,
"learning_rate": 1.6294736842105262e-05,
"loss": 1.4523,
"step": 9680
},
{
"epoch": 27.45,
"learning_rate": 1.6278947368421056e-05,
"loss": 1.4587,
"step": 9690
},
{
"epoch": 27.48,
"learning_rate": 1.6263157894736843e-05,
"loss": 1.4599,
"step": 9700
},
{
"epoch": 27.51,
"learning_rate": 1.6247368421052633e-05,
"loss": 1.4459,
"step": 9710
},
{
"epoch": 27.53,
"learning_rate": 1.623157894736842e-05,
"loss": 1.4492,
"step": 9720
},
{
"epoch": 27.56,
"learning_rate": 1.621578947368421e-05,
"loss": 1.4537,
"step": 9730
},
{
"epoch": 27.59,
"learning_rate": 1.62e-05,
"loss": 1.4507,
"step": 9740
},
{
"epoch": 27.62,
"learning_rate": 1.618421052631579e-05,
"loss": 1.4546,
"step": 9750
},
{
"epoch": 27.65,
"learning_rate": 1.6168421052631578e-05,
"loss": 1.4561,
"step": 9760
},
{
"epoch": 27.68,
"learning_rate": 1.615263157894737e-05,
"loss": 1.4472,
"step": 9770
},
{
"epoch": 27.7,
"learning_rate": 1.613684210526316e-05,
"loss": 1.4475,
"step": 9780
},
{
"epoch": 27.73,
"learning_rate": 1.612105263157895e-05,
"loss": 1.4499,
"step": 9790
},
{
"epoch": 27.76,
"learning_rate": 1.6105263157894736e-05,
"loss": 1.4549,
"step": 9800
},
{
"epoch": 27.79,
"learning_rate": 1.6089473684210526e-05,
"loss": 1.4532,
"step": 9810
},
{
"epoch": 27.82,
"learning_rate": 1.6073684210526313e-05,
"loss": 1.4622,
"step": 9820
},
{
"epoch": 27.84,
"learning_rate": 1.6057894736842107e-05,
"loss": 1.4459,
"step": 9830
},
{
"epoch": 27.87,
"learning_rate": 1.6042105263157897e-05,
"loss": 1.4534,
"step": 9840
},
{
"epoch": 27.9,
"learning_rate": 1.6026315789473684e-05,
"loss": 1.4538,
"step": 9850
},
{
"epoch": 27.93,
"learning_rate": 1.6010526315789475e-05,
"loss": 1.4614,
"step": 9860
},
{
"epoch": 27.96,
"learning_rate": 1.599473684210526e-05,
"loss": 1.4608,
"step": 9870
},
{
"epoch": 27.99,
"learning_rate": 1.5978947368421055e-05,
"loss": 1.4524,
"step": 9880
},
{
"epoch": 28.02,
"learning_rate": 1.5963157894736842e-05,
"loss": 1.5717,
"step": 9890
},
{
"epoch": 28.05,
"learning_rate": 1.5947368421052633e-05,
"loss": 1.4465,
"step": 9900
},
{
"epoch": 28.07,
"learning_rate": 1.593157894736842e-05,
"loss": 1.4488,
"step": 9910
},
{
"epoch": 28.1,
"learning_rate": 1.5915789473684213e-05,
"loss": 1.4412,
"step": 9920
},
{
"epoch": 28.13,
"learning_rate": 1.59e-05,
"loss": 1.4391,
"step": 9930
},
{
"epoch": 28.16,
"learning_rate": 1.588421052631579e-05,
"loss": 1.4502,
"step": 9940
},
{
"epoch": 28.19,
"learning_rate": 1.5868421052631578e-05,
"loss": 1.4522,
"step": 9950
},
{
"epoch": 28.21,
"learning_rate": 1.5852631578947368e-05,
"loss": 1.446,
"step": 9960
},
{
"epoch": 28.24,
"learning_rate": 1.5836842105263158e-05,
"loss": 1.4494,
"step": 9970
},
{
"epoch": 28.27,
"learning_rate": 1.582105263157895e-05,
"loss": 1.4492,
"step": 9980
},
{
"epoch": 28.3,
"learning_rate": 1.5805263157894735e-05,
"loss": 1.4441,
"step": 9990
},
{
"epoch": 28.33,
"learning_rate": 1.5789473684210526e-05,
"loss": 1.45,
"step": 10000
},
{
"epoch": 28.33,
"eval_denotation_accuracy": 0.5655245496291064,
"eval_loss": 2.1741864681243896,
"eval_runtime": 343.9405,
"eval_samples_per_second": 8.231,
"eval_steps_per_second": 2.058,
"step": 10000
},
{
"epoch": 28.36,
"learning_rate": 1.5773684210526316e-05,
"loss": 1.4492,
"step": 10010
},
{
"epoch": 28.38,
"learning_rate": 1.5757894736842107e-05,
"loss": 1.4592,
"step": 10020
},
{
"epoch": 28.41,
"learning_rate": 1.5742105263157897e-05,
"loss": 1.4474,
"step": 10030
},
{
"epoch": 28.44,
"learning_rate": 1.5726315789473684e-05,
"loss": 1.448,
"step": 10040
},
{
"epoch": 28.47,
"learning_rate": 1.5710526315789474e-05,
"loss": 1.4454,
"step": 10050
},
{
"epoch": 28.5,
"learning_rate": 1.5694736842105264e-05,
"loss": 1.4551,
"step": 10060
},
{
"epoch": 28.53,
"learning_rate": 1.5678947368421055e-05,
"loss": 1.444,
"step": 10070
},
{
"epoch": 28.55,
"learning_rate": 1.5663157894736842e-05,
"loss": 1.444,
"step": 10080
},
{
"epoch": 28.58,
"learning_rate": 1.5647368421052632e-05,
"loss": 1.4529,
"step": 10090
},
{
"epoch": 28.61,
"learning_rate": 1.563157894736842e-05,
"loss": 1.4558,
"step": 10100
},
{
"epoch": 28.64,
"learning_rate": 1.5615789473684213e-05,
"loss": 1.4422,
"step": 10110
},
{
"epoch": 28.67,
"learning_rate": 1.56e-05,
"loss": 1.4477,
"step": 10120
},
{
"epoch": 28.7,
"learning_rate": 1.558421052631579e-05,
"loss": 1.446,
"step": 10130
},
{
"epoch": 28.72,
"learning_rate": 1.5568421052631577e-05,
"loss": 1.4423,
"step": 10140
},
{
"epoch": 28.75,
"learning_rate": 1.5552631578947367e-05,
"loss": 1.4442,
"step": 10150
},
{
"epoch": 28.78,
"learning_rate": 1.5536842105263158e-05,
"loss": 1.4479,
"step": 10160
},
{
"epoch": 28.81,
"learning_rate": 1.5521052631578948e-05,
"loss": 1.4556,
"step": 10170
},
{
"epoch": 28.84,
"learning_rate": 1.550526315789474e-05,
"loss": 1.4418,
"step": 10180
},
{
"epoch": 28.86,
"learning_rate": 1.5489473684210525e-05,
"loss": 1.451,
"step": 10190
},
{
"epoch": 28.89,
"learning_rate": 1.547368421052632e-05,
"loss": 1.4399,
"step": 10200
},
{
"epoch": 28.92,
"learning_rate": 1.5457894736842106e-05,
"loss": 1.4433,
"step": 10210
},
{
"epoch": 28.95,
"learning_rate": 1.5442105263157896e-05,
"loss": 1.4507,
"step": 10220
},
{
"epoch": 28.98,
"learning_rate": 1.5426315789473683e-05,
"loss": 1.4389,
"step": 10230
},
{
"epoch": 29.01,
"learning_rate": 1.5410526315789474e-05,
"loss": 1.568,
"step": 10240
},
{
"epoch": 29.04,
"learning_rate": 1.5394736842105264e-05,
"loss": 1.4508,
"step": 10250
},
{
"epoch": 29.06,
"learning_rate": 1.5378947368421054e-05,
"loss": 1.4691,
"step": 10260
},
{
"epoch": 29.09,
"learning_rate": 1.536315789473684e-05,
"loss": 1.4477,
"step": 10270
},
{
"epoch": 29.12,
"learning_rate": 1.534736842105263e-05,
"loss": 1.4421,
"step": 10280
},
{
"epoch": 29.15,
"learning_rate": 1.533157894736842e-05,
"loss": 1.4447,
"step": 10290
},
{
"epoch": 29.18,
"learning_rate": 1.5315789473684212e-05,
"loss": 1.4369,
"step": 10300
},
{
"epoch": 29.21,
"learning_rate": 1.53e-05,
"loss": 1.447,
"step": 10310
},
{
"epoch": 29.23,
"learning_rate": 1.528421052631579e-05,
"loss": 1.4429,
"step": 10320
},
{
"epoch": 29.26,
"learning_rate": 1.5268421052631576e-05,
"loss": 1.445,
"step": 10330
},
{
"epoch": 29.29,
"learning_rate": 1.525263157894737e-05,
"loss": 1.4455,
"step": 10340
},
{
"epoch": 29.32,
"learning_rate": 1.5236842105263159e-05,
"loss": 1.4524,
"step": 10350
},
{
"epoch": 29.35,
"learning_rate": 1.5221052631578948e-05,
"loss": 1.4439,
"step": 10360
},
{
"epoch": 29.38,
"learning_rate": 1.5205263157894736e-05,
"loss": 1.4389,
"step": 10370
},
{
"epoch": 29.4,
"learning_rate": 1.5189473684210525e-05,
"loss": 1.4435,
"step": 10380
},
{
"epoch": 29.43,
"learning_rate": 1.5173684210526317e-05,
"loss": 1.4401,
"step": 10390
},
{
"epoch": 29.46,
"learning_rate": 1.5157894736842105e-05,
"loss": 1.4424,
"step": 10400
},
{
"epoch": 29.49,
"learning_rate": 1.5142105263157894e-05,
"loss": 1.4546,
"step": 10410
},
{
"epoch": 29.52,
"learning_rate": 1.5126315789473684e-05,
"loss": 1.4429,
"step": 10420
},
{
"epoch": 29.55,
"learning_rate": 1.5110526315789473e-05,
"loss": 1.4474,
"step": 10430
},
{
"epoch": 29.57,
"learning_rate": 1.5094736842105265e-05,
"loss": 1.4457,
"step": 10440
},
{
"epoch": 29.6,
"learning_rate": 1.5078947368421054e-05,
"loss": 1.4507,
"step": 10450
},
{
"epoch": 29.63,
"learning_rate": 1.5063157894736842e-05,
"loss": 1.4445,
"step": 10460
},
{
"epoch": 29.66,
"learning_rate": 1.5047368421052631e-05,
"loss": 1.4514,
"step": 10470
},
{
"epoch": 29.69,
"learning_rate": 1.5031578947368423e-05,
"loss": 1.443,
"step": 10480
},
{
"epoch": 29.71,
"learning_rate": 1.5015789473684212e-05,
"loss": 1.4455,
"step": 10490
},
{
"epoch": 29.74,
"learning_rate": 1.5e-05,
"loss": 1.4482,
"step": 10500
},
{
"epoch": 29.77,
"learning_rate": 1.4984210526315789e-05,
"loss": 1.4545,
"step": 10510
},
{
"epoch": 29.8,
"learning_rate": 1.496842105263158e-05,
"loss": 1.4469,
"step": 10520
},
{
"epoch": 29.83,
"learning_rate": 1.4952631578947368e-05,
"loss": 1.4595,
"step": 10530
},
{
"epoch": 29.86,
"learning_rate": 1.4936842105263158e-05,
"loss": 1.4502,
"step": 10540
},
{
"epoch": 29.88,
"learning_rate": 1.4921052631578947e-05,
"loss": 1.4431,
"step": 10550
},
{
"epoch": 29.91,
"learning_rate": 1.4905263157894737e-05,
"loss": 1.4417,
"step": 10560
},
{
"epoch": 29.94,
"learning_rate": 1.4889473684210526e-05,
"loss": 1.4488,
"step": 10570
},
{
"epoch": 29.97,
"learning_rate": 1.4873684210526315e-05,
"loss": 1.438,
"step": 10580
},
{
"epoch": 30.0,
"learning_rate": 1.4857894736842107e-05,
"loss": 1.4436,
"step": 10590
},
{
"epoch": 30.03,
"learning_rate": 1.4842105263157895e-05,
"loss": 1.5746,
"step": 10600
},
{
"epoch": 30.06,
"learning_rate": 1.4826315789473686e-05,
"loss": 1.4384,
"step": 10610
},
{
"epoch": 30.08,
"learning_rate": 1.4810526315789474e-05,
"loss": 1.4467,
"step": 10620
},
{
"epoch": 30.11,
"learning_rate": 1.4794736842105265e-05,
"loss": 1.4427,
"step": 10630
},
{
"epoch": 30.14,
"learning_rate": 1.4778947368421053e-05,
"loss": 1.442,
"step": 10640
},
{
"epoch": 30.17,
"learning_rate": 1.4763157894736842e-05,
"loss": 1.4459,
"step": 10650
},
{
"epoch": 30.2,
"learning_rate": 1.4747368421052632e-05,
"loss": 1.4418,
"step": 10660
},
{
"epoch": 30.23,
"learning_rate": 1.4731578947368421e-05,
"loss": 1.4442,
"step": 10670
},
{
"epoch": 30.25,
"learning_rate": 1.4715789473684211e-05,
"loss": 1.441,
"step": 10680
},
{
"epoch": 30.28,
"learning_rate": 1.47e-05,
"loss": 1.4451,
"step": 10690
},
{
"epoch": 30.31,
"learning_rate": 1.468421052631579e-05,
"loss": 1.441,
"step": 10700
},
{
"epoch": 30.34,
"learning_rate": 1.4668421052631579e-05,
"loss": 1.4487,
"step": 10710
},
{
"epoch": 30.37,
"learning_rate": 1.4652631578947367e-05,
"loss": 1.4508,
"step": 10720
},
{
"epoch": 30.4,
"learning_rate": 1.4636842105263158e-05,
"loss": 1.4441,
"step": 10730
},
{
"epoch": 30.42,
"learning_rate": 1.4621052631578946e-05,
"loss": 1.4405,
"step": 10740
},
{
"epoch": 30.45,
"learning_rate": 1.4605263157894737e-05,
"loss": 1.4432,
"step": 10750
},
{
"epoch": 30.48,
"learning_rate": 1.4589473684210527e-05,
"loss": 1.4416,
"step": 10760
},
{
"epoch": 30.51,
"learning_rate": 1.4573684210526317e-05,
"loss": 1.4374,
"step": 10770
},
{
"epoch": 30.54,
"learning_rate": 1.4557894736842106e-05,
"loss": 1.4431,
"step": 10780
},
{
"epoch": 30.57,
"learning_rate": 1.4542105263157895e-05,
"loss": 1.4433,
"step": 10790
},
{
"epoch": 30.59,
"learning_rate": 1.4526315789473685e-05,
"loss": 1.4387,
"step": 10800
},
{
"epoch": 30.62,
"learning_rate": 1.4510526315789474e-05,
"loss": 1.4442,
"step": 10810
},
{
"epoch": 30.65,
"learning_rate": 1.4494736842105264e-05,
"loss": 1.4588,
"step": 10820
},
{
"epoch": 30.68,
"learning_rate": 1.4478947368421053e-05,
"loss": 1.4447,
"step": 10830
},
{
"epoch": 30.71,
"learning_rate": 1.4463157894736843e-05,
"loss": 1.4411,
"step": 10840
},
{
"epoch": 30.73,
"learning_rate": 1.4447368421052632e-05,
"loss": 1.4487,
"step": 10850
},
{
"epoch": 30.76,
"learning_rate": 1.443157894736842e-05,
"loss": 1.4416,
"step": 10860
},
{
"epoch": 30.79,
"learning_rate": 1.441578947368421e-05,
"loss": 1.4445,
"step": 10870
},
{
"epoch": 30.82,
"learning_rate": 1.44e-05,
"loss": 1.448,
"step": 10880
},
{
"epoch": 30.85,
"learning_rate": 1.438421052631579e-05,
"loss": 1.4417,
"step": 10890
},
{
"epoch": 30.88,
"learning_rate": 1.4368421052631578e-05,
"loss": 1.4531,
"step": 10900
},
{
"epoch": 30.9,
"learning_rate": 1.4352631578947369e-05,
"loss": 1.442,
"step": 10910
},
{
"epoch": 30.93,
"learning_rate": 1.4336842105263159e-05,
"loss": 1.4445,
"step": 10920
},
{
"epoch": 30.96,
"learning_rate": 1.4321052631578948e-05,
"loss": 1.4446,
"step": 10930
},
{
"epoch": 30.99,
"learning_rate": 1.4305263157894738e-05,
"loss": 1.4354,
"step": 10940
},
{
"epoch": 31.02,
"learning_rate": 1.4289473684210527e-05,
"loss": 1.5688,
"step": 10950
},
{
"epoch": 31.05,
"learning_rate": 1.4273684210526317e-05,
"loss": 1.4468,
"step": 10960
},
{
"epoch": 31.08,
"learning_rate": 1.4257894736842106e-05,
"loss": 1.4484,
"step": 10970
},
{
"epoch": 31.1,
"learning_rate": 1.4242105263157896e-05,
"loss": 1.4418,
"step": 10980
},
{
"epoch": 31.13,
"learning_rate": 1.4226315789473685e-05,
"loss": 1.4436,
"step": 10990
},
{
"epoch": 31.16,
"learning_rate": 1.4210526315789473e-05,
"loss": 1.4387,
"step": 11000
},
{
"epoch": 31.16,
"eval_denotation_accuracy": 0.5754150476863299,
"eval_loss": 2.172001600265503,
"eval_runtime": 356.2446,
"eval_samples_per_second": 7.947,
"eval_steps_per_second": 1.987,
"step": 11000
},
{
"epoch": 31.19,
"learning_rate": 1.4194736842105264e-05,
"loss": 1.4366,
"step": 11010
},
{
"epoch": 31.22,
"learning_rate": 1.4178947368421052e-05,
"loss": 1.4424,
"step": 11020
},
{
"epoch": 31.25,
"learning_rate": 1.4163157894736843e-05,
"loss": 1.4401,
"step": 11030
},
{
"epoch": 31.27,
"learning_rate": 1.4147368421052631e-05,
"loss": 1.4421,
"step": 11040
},
{
"epoch": 31.3,
"learning_rate": 1.4131578947368422e-05,
"loss": 1.4503,
"step": 11050
},
{
"epoch": 31.33,
"learning_rate": 1.411578947368421e-05,
"loss": 1.44,
"step": 11060
},
{
"epoch": 31.36,
"learning_rate": 1.4099999999999999e-05,
"loss": 1.4547,
"step": 11070
},
{
"epoch": 31.39,
"learning_rate": 1.408421052631579e-05,
"loss": 1.4453,
"step": 11080
},
{
"epoch": 31.42,
"learning_rate": 1.406842105263158e-05,
"loss": 1.4466,
"step": 11090
},
{
"epoch": 31.44,
"learning_rate": 1.405263157894737e-05,
"loss": 1.4418,
"step": 11100
},
{
"epoch": 31.47,
"learning_rate": 1.4036842105263158e-05,
"loss": 1.4453,
"step": 11110
},
{
"epoch": 31.5,
"learning_rate": 1.4021052631578949e-05,
"loss": 1.4463,
"step": 11120
},
{
"epoch": 31.53,
"learning_rate": 1.4005263157894737e-05,
"loss": 1.4414,
"step": 11130
},
{
"epoch": 31.56,
"learning_rate": 1.3989473684210526e-05,
"loss": 1.4417,
"step": 11140
},
{
"epoch": 31.58,
"learning_rate": 1.3973684210526316e-05,
"loss": 1.4382,
"step": 11150
},
{
"epoch": 31.61,
"learning_rate": 1.3957894736842105e-05,
"loss": 1.4421,
"step": 11160
},
{
"epoch": 31.64,
"learning_rate": 1.3942105263157895e-05,
"loss": 1.44,
"step": 11170
},
{
"epoch": 31.67,
"learning_rate": 1.3926315789473684e-05,
"loss": 1.4404,
"step": 11180
},
{
"epoch": 31.7,
"learning_rate": 1.3910526315789474e-05,
"loss": 1.4501,
"step": 11190
},
{
"epoch": 31.73,
"learning_rate": 1.3894736842105263e-05,
"loss": 1.4476,
"step": 11200
},
{
"epoch": 31.75,
"learning_rate": 1.3878947368421052e-05,
"loss": 1.4348,
"step": 11210
},
{
"epoch": 31.78,
"learning_rate": 1.3863157894736842e-05,
"loss": 1.4393,
"step": 11220
},
{
"epoch": 31.81,
"learning_rate": 1.384736842105263e-05,
"loss": 1.441,
"step": 11230
},
{
"epoch": 31.84,
"learning_rate": 1.3831578947368421e-05,
"loss": 1.4393,
"step": 11240
},
{
"epoch": 31.87,
"learning_rate": 1.3815789473684211e-05,
"loss": 1.4382,
"step": 11250
},
{
"epoch": 31.9,
"learning_rate": 1.3800000000000002e-05,
"loss": 1.4465,
"step": 11260
},
{
"epoch": 31.92,
"learning_rate": 1.378421052631579e-05,
"loss": 1.4413,
"step": 11270
},
{
"epoch": 31.95,
"learning_rate": 1.3768421052631579e-05,
"loss": 1.4435,
"step": 11280
},
{
"epoch": 31.98,
"learning_rate": 1.375263157894737e-05,
"loss": 1.4449,
"step": 11290
},
{
"epoch": 32.01,
"learning_rate": 1.3736842105263158e-05,
"loss": 1.5643,
"step": 11300
},
{
"epoch": 32.04,
"learning_rate": 1.3721052631578948e-05,
"loss": 1.4375,
"step": 11310
},
{
"epoch": 32.07,
"learning_rate": 1.3705263157894737e-05,
"loss": 1.4359,
"step": 11320
},
{
"epoch": 32.1,
"learning_rate": 1.3689473684210527e-05,
"loss": 1.4309,
"step": 11330
},
{
"epoch": 32.12,
"learning_rate": 1.3673684210526316e-05,
"loss": 1.4376,
"step": 11340
},
{
"epoch": 32.15,
"learning_rate": 1.3657894736842106e-05,
"loss": 1.436,
"step": 11350
},
{
"epoch": 32.18,
"learning_rate": 1.3642105263157895e-05,
"loss": 1.4455,
"step": 11360
},
{
"epoch": 32.21,
"learning_rate": 1.3626315789473684e-05,
"loss": 1.4416,
"step": 11370
},
{
"epoch": 32.24,
"learning_rate": 1.3610526315789474e-05,
"loss": 1.4381,
"step": 11380
},
{
"epoch": 32.27,
"learning_rate": 1.3594736842105263e-05,
"loss": 1.438,
"step": 11390
},
{
"epoch": 32.29,
"learning_rate": 1.3578947368421053e-05,
"loss": 1.4397,
"step": 11400
},
{
"epoch": 32.32,
"learning_rate": 1.3563157894736842e-05,
"loss": 1.4342,
"step": 11410
},
{
"epoch": 32.35,
"learning_rate": 1.3547368421052634e-05,
"loss": 1.4357,
"step": 11420
},
{
"epoch": 32.38,
"learning_rate": 1.3531578947368422e-05,
"loss": 1.4391,
"step": 11430
},
{
"epoch": 32.41,
"learning_rate": 1.3515789473684211e-05,
"loss": 1.4376,
"step": 11440
},
{
"epoch": 32.44,
"learning_rate": 1.3500000000000001e-05,
"loss": 1.4378,
"step": 11450
},
{
"epoch": 32.46,
"learning_rate": 1.348421052631579e-05,
"loss": 1.4369,
"step": 11460
},
{
"epoch": 32.49,
"learning_rate": 1.346842105263158e-05,
"loss": 1.4441,
"step": 11470
},
{
"epoch": 32.52,
"learning_rate": 1.3452631578947369e-05,
"loss": 1.448,
"step": 11480
},
{
"epoch": 32.55,
"learning_rate": 1.343684210526316e-05,
"loss": 1.4375,
"step": 11490
},
{
"epoch": 32.58,
"learning_rate": 1.3421052631578948e-05,
"loss": 1.4345,
"step": 11500
},
{
"epoch": 32.6,
"learning_rate": 1.3405263157894736e-05,
"loss": 1.4377,
"step": 11510
},
{
"epoch": 32.63,
"learning_rate": 1.3389473684210527e-05,
"loss": 1.4494,
"step": 11520
},
{
"epoch": 32.66,
"learning_rate": 1.3373684210526315e-05,
"loss": 1.4433,
"step": 11530
},
{
"epoch": 32.69,
"learning_rate": 1.3357894736842106e-05,
"loss": 1.4489,
"step": 11540
},
{
"epoch": 32.72,
"learning_rate": 1.3342105263157894e-05,
"loss": 1.4412,
"step": 11550
},
{
"epoch": 32.75,
"learning_rate": 1.3326315789473685e-05,
"loss": 1.4376,
"step": 11560
},
{
"epoch": 32.77,
"learning_rate": 1.3310526315789473e-05,
"loss": 1.4435,
"step": 11570
},
{
"epoch": 32.8,
"learning_rate": 1.3294736842105262e-05,
"loss": 1.4378,
"step": 11580
},
{
"epoch": 32.83,
"learning_rate": 1.3278947368421054e-05,
"loss": 1.4386,
"step": 11590
},
{
"epoch": 32.86,
"learning_rate": 1.3263157894736843e-05,
"loss": 1.4428,
"step": 11600
},
{
"epoch": 32.89,
"learning_rate": 1.3247368421052633e-05,
"loss": 1.4409,
"step": 11610
},
{
"epoch": 32.92,
"learning_rate": 1.3231578947368422e-05,
"loss": 1.441,
"step": 11620
},
{
"epoch": 32.94,
"learning_rate": 1.3215789473684212e-05,
"loss": 1.4356,
"step": 11630
},
{
"epoch": 32.97,
"learning_rate": 1.32e-05,
"loss": 1.4436,
"step": 11640
},
{
"epoch": 33.0,
"learning_rate": 1.318421052631579e-05,
"loss": 1.5665,
"step": 11650
},
{
"epoch": 33.03,
"learning_rate": 1.316842105263158e-05,
"loss": 1.434,
"step": 11660
},
{
"epoch": 33.06,
"learning_rate": 1.3152631578947368e-05,
"loss": 1.4356,
"step": 11670
},
{
"epoch": 33.09,
"learning_rate": 1.3136842105263159e-05,
"loss": 1.4409,
"step": 11680
},
{
"epoch": 33.12,
"learning_rate": 1.3121052631578947e-05,
"loss": 1.4436,
"step": 11690
},
{
"epoch": 33.14,
"learning_rate": 1.3105263157894738e-05,
"loss": 1.438,
"step": 11700
},
{
"epoch": 33.17,
"learning_rate": 1.3089473684210526e-05,
"loss": 1.4404,
"step": 11710
},
{
"epoch": 33.2,
"learning_rate": 1.3073684210526315e-05,
"loss": 1.4388,
"step": 11720
},
{
"epoch": 33.23,
"learning_rate": 1.3057894736842105e-05,
"loss": 1.4436,
"step": 11730
},
{
"epoch": 33.26,
"learning_rate": 1.3042105263157894e-05,
"loss": 1.4393,
"step": 11740
},
{
"epoch": 33.29,
"learning_rate": 1.3026315789473684e-05,
"loss": 1.4385,
"step": 11750
},
{
"epoch": 33.31,
"learning_rate": 1.3010526315789475e-05,
"loss": 1.4362,
"step": 11760
},
{
"epoch": 33.34,
"learning_rate": 1.2994736842105265e-05,
"loss": 1.4369,
"step": 11770
},
{
"epoch": 33.37,
"learning_rate": 1.2978947368421054e-05,
"loss": 1.435,
"step": 11780
},
{
"epoch": 33.4,
"learning_rate": 1.2963157894736842e-05,
"loss": 1.4342,
"step": 11790
},
{
"epoch": 33.43,
"learning_rate": 1.2947368421052633e-05,
"loss": 1.4362,
"step": 11800
},
{
"epoch": 33.45,
"learning_rate": 1.2931578947368421e-05,
"loss": 1.4349,
"step": 11810
},
{
"epoch": 33.48,
"learning_rate": 1.2915789473684212e-05,
"loss": 1.436,
"step": 11820
},
{
"epoch": 33.51,
"learning_rate": 1.29e-05,
"loss": 1.4361,
"step": 11830
},
{
"epoch": 33.54,
"learning_rate": 1.288421052631579e-05,
"loss": 1.4409,
"step": 11840
},
{
"epoch": 33.57,
"learning_rate": 1.2868421052631579e-05,
"loss": 1.4433,
"step": 11850
},
{
"epoch": 33.6,
"learning_rate": 1.2852631578947368e-05,
"loss": 1.4505,
"step": 11860
},
{
"epoch": 33.62,
"learning_rate": 1.2836842105263158e-05,
"loss": 1.4358,
"step": 11870
},
{
"epoch": 33.65,
"learning_rate": 1.2821052631578947e-05,
"loss": 1.442,
"step": 11880
},
{
"epoch": 33.68,
"learning_rate": 1.2805263157894737e-05,
"loss": 1.4447,
"step": 11890
},
{
"epoch": 33.71,
"learning_rate": 1.2789473684210526e-05,
"loss": 1.4462,
"step": 11900
},
{
"epoch": 33.74,
"learning_rate": 1.2773684210526316e-05,
"loss": 1.4398,
"step": 11910
},
{
"epoch": 33.77,
"learning_rate": 1.2757894736842106e-05,
"loss": 1.4486,
"step": 11920
},
{
"epoch": 33.79,
"learning_rate": 1.2742105263157895e-05,
"loss": 1.4388,
"step": 11930
},
{
"epoch": 33.82,
"learning_rate": 1.2726315789473685e-05,
"loss": 1.4432,
"step": 11940
},
{
"epoch": 33.85,
"learning_rate": 1.2710526315789474e-05,
"loss": 1.4357,
"step": 11950
},
{
"epoch": 33.88,
"learning_rate": 1.2694736842105264e-05,
"loss": 1.4354,
"step": 11960
},
{
"epoch": 33.91,
"learning_rate": 1.2678947368421053e-05,
"loss": 1.4359,
"step": 11970
},
{
"epoch": 33.94,
"learning_rate": 1.2663157894736843e-05,
"loss": 1.4386,
"step": 11980
},
{
"epoch": 33.96,
"learning_rate": 1.2647368421052632e-05,
"loss": 1.4405,
"step": 11990
},
{
"epoch": 33.99,
"learning_rate": 1.263157894736842e-05,
"loss": 1.4414,
"step": 12000
},
{
"epoch": 33.99,
"eval_denotation_accuracy": 0.5683504062168845,
"eval_loss": 2.178483009338379,
"eval_runtime": 358.5197,
"eval_samples_per_second": 7.896,
"eval_steps_per_second": 1.975,
"step": 12000
},
{
"epoch": 34.02,
"learning_rate": 1.2615789473684211e-05,
"loss": 1.563,
"step": 12010
},
{
"epoch": 34.05,
"learning_rate": 1.26e-05,
"loss": 1.4362,
"step": 12020
},
{
"epoch": 34.08,
"learning_rate": 1.258421052631579e-05,
"loss": 1.4417,
"step": 12030
},
{
"epoch": 34.11,
"learning_rate": 1.2568421052631579e-05,
"loss": 1.4401,
"step": 12040
},
{
"epoch": 34.14,
"learning_rate": 1.2552631578947369e-05,
"loss": 1.4399,
"step": 12050
},
{
"epoch": 34.16,
"learning_rate": 1.2536842105263158e-05,
"loss": 1.4323,
"step": 12060
},
{
"epoch": 34.19,
"learning_rate": 1.2521052631578946e-05,
"loss": 1.4393,
"step": 12070
},
{
"epoch": 34.22,
"learning_rate": 1.2505263157894737e-05,
"loss": 1.4359,
"step": 12080
},
{
"epoch": 34.25,
"learning_rate": 1.2489473684210527e-05,
"loss": 1.4371,
"step": 12090
},
{
"epoch": 34.28,
"learning_rate": 1.2473684210526317e-05,
"loss": 1.4342,
"step": 12100
},
{
"epoch": 34.31,
"learning_rate": 1.2457894736842106e-05,
"loss": 1.437,
"step": 12110
},
{
"epoch": 34.33,
"learning_rate": 1.2442105263157896e-05,
"loss": 1.4368,
"step": 12120
},
{
"epoch": 34.36,
"learning_rate": 1.2426315789473685e-05,
"loss": 1.4359,
"step": 12130
},
{
"epoch": 34.39,
"learning_rate": 1.2410526315789474e-05,
"loss": 1.4348,
"step": 12140
},
{
"epoch": 34.42,
"learning_rate": 1.2394736842105264e-05,
"loss": 1.4334,
"step": 12150
},
{
"epoch": 34.45,
"learning_rate": 1.2378947368421053e-05,
"loss": 1.4343,
"step": 12160
},
{
"epoch": 34.47,
"learning_rate": 1.2363157894736843e-05,
"loss": 1.4329,
"step": 12170
},
{
"epoch": 34.5,
"learning_rate": 1.2347368421052631e-05,
"loss": 1.434,
"step": 12180
},
{
"epoch": 34.53,
"learning_rate": 1.2331578947368422e-05,
"loss": 1.4382,
"step": 12190
},
{
"epoch": 34.56,
"learning_rate": 1.231578947368421e-05,
"loss": 1.433,
"step": 12200
},
{
"epoch": 34.59,
"learning_rate": 1.2299999999999999e-05,
"loss": 1.4331,
"step": 12210
},
{
"epoch": 34.62,
"learning_rate": 1.228421052631579e-05,
"loss": 1.4368,
"step": 12220
},
{
"epoch": 34.64,
"learning_rate": 1.2268421052631578e-05,
"loss": 1.4369,
"step": 12230
},
{
"epoch": 34.67,
"learning_rate": 1.2252631578947368e-05,
"loss": 1.4364,
"step": 12240
},
{
"epoch": 34.7,
"learning_rate": 1.2236842105263159e-05,
"loss": 1.4365,
"step": 12250
},
{
"epoch": 34.73,
"learning_rate": 1.2221052631578949e-05,
"loss": 1.4349,
"step": 12260
},
{
"epoch": 34.76,
"learning_rate": 1.2205263157894738e-05,
"loss": 1.4343,
"step": 12270
},
{
"epoch": 34.79,
"learning_rate": 1.2189473684210526e-05,
"loss": 1.4377,
"step": 12280
},
{
"epoch": 34.81,
"learning_rate": 1.2173684210526317e-05,
"loss": 1.4331,
"step": 12290
},
{
"epoch": 34.84,
"learning_rate": 1.2157894736842105e-05,
"loss": 1.4334,
"step": 12300
},
{
"epoch": 34.87,
"learning_rate": 1.2142105263157896e-05,
"loss": 1.4392,
"step": 12310
},
{
"epoch": 34.9,
"learning_rate": 1.2126315789473684e-05,
"loss": 1.4378,
"step": 12320
},
{
"epoch": 34.93,
"learning_rate": 1.2110526315789475e-05,
"loss": 1.4321,
"step": 12330
},
{
"epoch": 34.96,
"learning_rate": 1.2094736842105263e-05,
"loss": 1.4379,
"step": 12340
},
{
"epoch": 34.98,
"learning_rate": 1.2078947368421052e-05,
"loss": 1.4351,
"step": 12350
},
{
"epoch": 35.01,
"learning_rate": 1.2063157894736842e-05,
"loss": 1.5578,
"step": 12360
},
{
"epoch": 35.04,
"learning_rate": 1.2047368421052631e-05,
"loss": 1.4329,
"step": 12370
},
{
"epoch": 35.07,
"learning_rate": 1.2031578947368421e-05,
"loss": 1.4325,
"step": 12380
},
{
"epoch": 35.1,
"learning_rate": 1.201578947368421e-05,
"loss": 1.4342,
"step": 12390
},
{
"epoch": 35.13,
"learning_rate": 1.2e-05,
"loss": 1.4406,
"step": 12400
},
{
"epoch": 35.16,
"learning_rate": 1.1984210526315789e-05,
"loss": 1.4337,
"step": 12410
},
{
"epoch": 35.18,
"learning_rate": 1.196842105263158e-05,
"loss": 1.4424,
"step": 12420
},
{
"epoch": 35.21,
"learning_rate": 1.195263157894737e-05,
"loss": 1.4368,
"step": 12430
},
{
"epoch": 35.24,
"learning_rate": 1.1936842105263158e-05,
"loss": 1.4357,
"step": 12440
},
{
"epoch": 35.27,
"learning_rate": 1.1921052631578949e-05,
"loss": 1.4333,
"step": 12450
},
{
"epoch": 35.3,
"learning_rate": 1.1905263157894737e-05,
"loss": 1.4336,
"step": 12460
},
{
"epoch": 35.32,
"learning_rate": 1.1889473684210528e-05,
"loss": 1.4349,
"step": 12470
},
{
"epoch": 35.35,
"learning_rate": 1.1873684210526316e-05,
"loss": 1.4329,
"step": 12480
},
{
"epoch": 35.38,
"learning_rate": 1.1857894736842105e-05,
"loss": 1.4317,
"step": 12490
},
{
"epoch": 35.41,
"learning_rate": 1.1842105263157895e-05,
"loss": 1.4354,
"step": 12500
},
{
"epoch": 35.44,
"learning_rate": 1.1826315789473684e-05,
"loss": 1.4361,
"step": 12510
},
{
"epoch": 35.47,
"learning_rate": 1.1810526315789474e-05,
"loss": 1.4375,
"step": 12520
},
{
"epoch": 35.49,
"learning_rate": 1.1794736842105263e-05,
"loss": 1.4348,
"step": 12530
},
{
"epoch": 35.52,
"learning_rate": 1.1778947368421053e-05,
"loss": 1.4358,
"step": 12540
},
{
"epoch": 35.55,
"learning_rate": 1.1763157894736842e-05,
"loss": 1.4317,
"step": 12550
},
{
"epoch": 35.58,
"learning_rate": 1.174736842105263e-05,
"loss": 1.4334,
"step": 12560
},
{
"epoch": 35.61,
"learning_rate": 1.173157894736842e-05,
"loss": 1.4378,
"step": 12570
},
{
"epoch": 35.64,
"learning_rate": 1.171578947368421e-05,
"loss": 1.4345,
"step": 12580
},
{
"epoch": 35.66,
"learning_rate": 1.1700000000000001e-05,
"loss": 1.4424,
"step": 12590
},
{
"epoch": 35.69,
"learning_rate": 1.168421052631579e-05,
"loss": 1.437,
"step": 12600
},
{
"epoch": 35.72,
"learning_rate": 1.166842105263158e-05,
"loss": 1.4333,
"step": 12610
},
{
"epoch": 35.75,
"learning_rate": 1.1652631578947369e-05,
"loss": 1.4348,
"step": 12620
},
{
"epoch": 35.78,
"learning_rate": 1.1636842105263158e-05,
"loss": 1.4365,
"step": 12630
},
{
"epoch": 35.81,
"learning_rate": 1.1621052631578948e-05,
"loss": 1.4356,
"step": 12640
},
{
"epoch": 35.83,
"learning_rate": 1.1605263157894737e-05,
"loss": 1.4344,
"step": 12650
},
{
"epoch": 35.86,
"learning_rate": 1.1589473684210527e-05,
"loss": 1.4359,
"step": 12660
},
{
"epoch": 35.89,
"learning_rate": 1.1573684210526316e-05,
"loss": 1.4332,
"step": 12670
},
{
"epoch": 35.92,
"learning_rate": 1.1557894736842106e-05,
"loss": 1.437,
"step": 12680
},
{
"epoch": 35.95,
"learning_rate": 1.1542105263157895e-05,
"loss": 1.4394,
"step": 12690
},
{
"epoch": 35.97,
"learning_rate": 1.1526315789473683e-05,
"loss": 1.4346,
"step": 12700
},
{
"epoch": 36.01,
"learning_rate": 1.1510526315789474e-05,
"loss": 1.5572,
"step": 12710
},
{
"epoch": 36.03,
"learning_rate": 1.1494736842105262e-05,
"loss": 1.433,
"step": 12720
},
{
"epoch": 36.06,
"learning_rate": 1.1478947368421053e-05,
"loss": 1.4303,
"step": 12730
},
{
"epoch": 36.09,
"learning_rate": 1.1463157894736841e-05,
"loss": 1.4337,
"step": 12740
},
{
"epoch": 36.12,
"learning_rate": 1.1447368421052632e-05,
"loss": 1.4343,
"step": 12750
},
{
"epoch": 36.15,
"learning_rate": 1.1431578947368422e-05,
"loss": 1.4361,
"step": 12760
},
{
"epoch": 36.18,
"learning_rate": 1.141578947368421e-05,
"loss": 1.4425,
"step": 12770
},
{
"epoch": 36.2,
"learning_rate": 1.1400000000000001e-05,
"loss": 1.4347,
"step": 12780
},
{
"epoch": 36.23,
"learning_rate": 1.138421052631579e-05,
"loss": 1.4311,
"step": 12790
},
{
"epoch": 36.26,
"learning_rate": 1.136842105263158e-05,
"loss": 1.4336,
"step": 12800
},
{
"epoch": 36.29,
"learning_rate": 1.1352631578947369e-05,
"loss": 1.4356,
"step": 12810
},
{
"epoch": 36.32,
"learning_rate": 1.1336842105263159e-05,
"loss": 1.4348,
"step": 12820
},
{
"epoch": 36.34,
"learning_rate": 1.1321052631578948e-05,
"loss": 1.4379,
"step": 12830
},
{
"epoch": 36.37,
"learning_rate": 1.1305263157894736e-05,
"loss": 1.438,
"step": 12840
},
{
"epoch": 36.4,
"learning_rate": 1.1289473684210527e-05,
"loss": 1.4303,
"step": 12850
},
{
"epoch": 36.43,
"learning_rate": 1.1273684210526315e-05,
"loss": 1.4343,
"step": 12860
},
{
"epoch": 36.46,
"learning_rate": 1.1257894736842106e-05,
"loss": 1.4314,
"step": 12870
},
{
"epoch": 36.49,
"learning_rate": 1.1242105263157894e-05,
"loss": 1.4331,
"step": 12880
},
{
"epoch": 36.51,
"learning_rate": 1.1226315789473685e-05,
"loss": 1.4341,
"step": 12890
},
{
"epoch": 36.54,
"learning_rate": 1.1210526315789473e-05,
"loss": 1.4332,
"step": 12900
},
{
"epoch": 36.57,
"learning_rate": 1.1194736842105264e-05,
"loss": 1.4327,
"step": 12910
},
{
"epoch": 36.6,
"learning_rate": 1.1178947368421054e-05,
"loss": 1.4338,
"step": 12920
},
{
"epoch": 36.63,
"learning_rate": 1.1163157894736842e-05,
"loss": 1.4302,
"step": 12930
},
{
"epoch": 36.66,
"learning_rate": 1.1147368421052633e-05,
"loss": 1.4345,
"step": 12940
},
{
"epoch": 36.68,
"learning_rate": 1.1131578947368421e-05,
"loss": 1.4332,
"step": 12950
},
{
"epoch": 36.71,
"learning_rate": 1.1115789473684212e-05,
"loss": 1.4319,
"step": 12960
},
{
"epoch": 36.74,
"learning_rate": 1.11e-05,
"loss": 1.4384,
"step": 12970
},
{
"epoch": 36.77,
"learning_rate": 1.108421052631579e-05,
"loss": 1.4363,
"step": 12980
},
{
"epoch": 36.8,
"learning_rate": 1.106842105263158e-05,
"loss": 1.4329,
"step": 12990
},
{
"epoch": 36.83,
"learning_rate": 1.1052631578947368e-05,
"loss": 1.431,
"step": 13000
},
{
"epoch": 36.83,
"eval_denotation_accuracy": 0.571529494878135,
"eval_loss": 2.1800432205200195,
"eval_runtime": 373.5447,
"eval_samples_per_second": 7.579,
"eval_steps_per_second": 1.895,
"step": 13000
},
{
"epoch": 36.85,
"learning_rate": 1.1036842105263158e-05,
"loss": 1.433,
"step": 13010
},
{
"epoch": 36.88,
"learning_rate": 1.1021052631578947e-05,
"loss": 1.4333,
"step": 13020
},
{
"epoch": 36.91,
"learning_rate": 1.1005263157894737e-05,
"loss": 1.4356,
"step": 13030
},
{
"epoch": 36.94,
"learning_rate": 1.0989473684210526e-05,
"loss": 1.434,
"step": 13040
},
{
"epoch": 36.97,
"learning_rate": 1.0973684210526316e-05,
"loss": 1.432,
"step": 13050
},
{
"epoch": 36.99,
"learning_rate": 1.0957894736842105e-05,
"loss": 1.4389,
"step": 13060
},
{
"epoch": 37.03,
"learning_rate": 1.0942105263157894e-05,
"loss": 1.5555,
"step": 13070
},
{
"epoch": 37.05,
"learning_rate": 1.0926315789473684e-05,
"loss": 1.4337,
"step": 13080
},
{
"epoch": 37.08,
"learning_rate": 1.0910526315789474e-05,
"loss": 1.4387,
"step": 13090
},
{
"epoch": 37.11,
"learning_rate": 1.0894736842105265e-05,
"loss": 1.4395,
"step": 13100
},
{
"epoch": 37.14,
"learning_rate": 1.0878947368421053e-05,
"loss": 1.442,
"step": 13110
},
{
"epoch": 37.17,
"learning_rate": 1.0863157894736844e-05,
"loss": 1.4446,
"step": 13120
},
{
"epoch": 37.19,
"learning_rate": 1.0847368421052632e-05,
"loss": 1.4369,
"step": 13130
},
{
"epoch": 37.22,
"learning_rate": 1.0831578947368421e-05,
"loss": 1.4341,
"step": 13140
},
{
"epoch": 37.25,
"learning_rate": 1.0815789473684211e-05,
"loss": 1.4337,
"step": 13150
},
{
"epoch": 37.28,
"learning_rate": 1.08e-05,
"loss": 1.4353,
"step": 13160
},
{
"epoch": 37.31,
"learning_rate": 1.078421052631579e-05,
"loss": 1.4331,
"step": 13170
},
{
"epoch": 37.34,
"learning_rate": 1.0768421052631579e-05,
"loss": 1.4359,
"step": 13180
},
{
"epoch": 37.36,
"learning_rate": 1.075263157894737e-05,
"loss": 1.4319,
"step": 13190
},
{
"epoch": 37.39,
"learning_rate": 1.0736842105263158e-05,
"loss": 1.4307,
"step": 13200
},
{
"epoch": 37.42,
"learning_rate": 1.0721052631578947e-05,
"loss": 1.4306,
"step": 13210
},
{
"epoch": 37.45,
"learning_rate": 1.0705263157894737e-05,
"loss": 1.4331,
"step": 13220
},
{
"epoch": 37.48,
"learning_rate": 1.0689473684210526e-05,
"loss": 1.4386,
"step": 13230
},
{
"epoch": 37.51,
"learning_rate": 1.0673684210526316e-05,
"loss": 1.4362,
"step": 13240
},
{
"epoch": 37.53,
"learning_rate": 1.0657894736842106e-05,
"loss": 1.4335,
"step": 13250
},
{
"epoch": 37.56,
"learning_rate": 1.0642105263157897e-05,
"loss": 1.4323,
"step": 13260
},
{
"epoch": 37.59,
"learning_rate": 1.0626315789473685e-05,
"loss": 1.4331,
"step": 13270
},
{
"epoch": 37.62,
"learning_rate": 1.0610526315789474e-05,
"loss": 1.431,
"step": 13280
},
{
"epoch": 37.65,
"learning_rate": 1.0594736842105264e-05,
"loss": 1.4342,
"step": 13290
},
{
"epoch": 37.68,
"learning_rate": 1.0578947368421053e-05,
"loss": 1.4299,
"step": 13300
},
{
"epoch": 37.7,
"learning_rate": 1.0563157894736843e-05,
"loss": 1.4336,
"step": 13310
},
{
"epoch": 37.73,
"learning_rate": 1.0547368421052632e-05,
"loss": 1.4364,
"step": 13320
},
{
"epoch": 37.76,
"learning_rate": 1.0531578947368422e-05,
"loss": 1.4352,
"step": 13330
},
{
"epoch": 37.79,
"learning_rate": 1.051578947368421e-05,
"loss": 1.435,
"step": 13340
},
{
"epoch": 37.82,
"learning_rate": 1.05e-05,
"loss": 1.4351,
"step": 13350
},
{
"epoch": 37.84,
"learning_rate": 1.048421052631579e-05,
"loss": 1.4322,
"step": 13360
},
{
"epoch": 37.87,
"learning_rate": 1.0468421052631578e-05,
"loss": 1.4318,
"step": 13370
},
{
"epoch": 37.9,
"learning_rate": 1.0452631578947369e-05,
"loss": 1.4326,
"step": 13380
},
{
"epoch": 37.93,
"learning_rate": 1.0436842105263157e-05,
"loss": 1.4351,
"step": 13390
},
{
"epoch": 37.96,
"learning_rate": 1.0421052631578948e-05,
"loss": 1.4349,
"step": 13400
},
{
"epoch": 37.99,
"learning_rate": 1.0405263157894736e-05,
"loss": 1.4353,
"step": 13410
},
{
"epoch": 38.02,
"learning_rate": 1.0389473684210527e-05,
"loss": 1.557,
"step": 13420
},
{
"epoch": 38.05,
"learning_rate": 1.0373684210526317e-05,
"loss": 1.4375,
"step": 13430
},
{
"epoch": 38.07,
"learning_rate": 1.0357894736842106e-05,
"loss": 1.4359,
"step": 13440
},
{
"epoch": 38.1,
"learning_rate": 1.0342105263157896e-05,
"loss": 1.4353,
"step": 13450
},
{
"epoch": 38.13,
"learning_rate": 1.0326315789473685e-05,
"loss": 1.4326,
"step": 13460
},
{
"epoch": 38.16,
"learning_rate": 1.0310526315789475e-05,
"loss": 1.4343,
"step": 13470
},
{
"epoch": 38.19,
"learning_rate": 1.0294736842105264e-05,
"loss": 1.4347,
"step": 13480
},
{
"epoch": 38.21,
"learning_rate": 1.0278947368421052e-05,
"loss": 1.4322,
"step": 13490
},
{
"epoch": 38.24,
"learning_rate": 1.0263157894736843e-05,
"loss": 1.4324,
"step": 13500
},
{
"epoch": 38.27,
"learning_rate": 1.0247368421052631e-05,
"loss": 1.4313,
"step": 13510
},
{
"epoch": 38.3,
"learning_rate": 1.0231578947368422e-05,
"loss": 1.4285,
"step": 13520
},
{
"epoch": 38.33,
"learning_rate": 1.021578947368421e-05,
"loss": 1.4301,
"step": 13530
},
{
"epoch": 38.36,
"learning_rate": 1.02e-05,
"loss": 1.4311,
"step": 13540
},
{
"epoch": 38.38,
"learning_rate": 1.018421052631579e-05,
"loss": 1.4426,
"step": 13550
},
{
"epoch": 38.41,
"learning_rate": 1.0168421052631578e-05,
"loss": 1.433,
"step": 13560
},
{
"epoch": 38.44,
"learning_rate": 1.0152631578947368e-05,
"loss": 1.4326,
"step": 13570
},
{
"epoch": 38.47,
"learning_rate": 1.0136842105263157e-05,
"loss": 1.4286,
"step": 13580
},
{
"epoch": 38.5,
"learning_rate": 1.0121052631578949e-05,
"loss": 1.4321,
"step": 13590
},
{
"epoch": 38.53,
"learning_rate": 1.0105263157894738e-05,
"loss": 1.4361,
"step": 13600
},
{
"epoch": 38.55,
"learning_rate": 1.0089473684210528e-05,
"loss": 1.4319,
"step": 13610
},
{
"epoch": 38.58,
"learning_rate": 1.0073684210526317e-05,
"loss": 1.431,
"step": 13620
},
{
"epoch": 38.61,
"learning_rate": 1.0057894736842105e-05,
"loss": 1.4281,
"step": 13630
},
{
"epoch": 38.64,
"learning_rate": 1.0042105263157896e-05,
"loss": 1.4335,
"step": 13640
},
{
"epoch": 38.67,
"learning_rate": 1.0026315789473684e-05,
"loss": 1.4363,
"step": 13650
},
{
"epoch": 38.7,
"learning_rate": 1.0010526315789474e-05,
"loss": 1.4297,
"step": 13660
},
{
"epoch": 38.72,
"learning_rate": 9.994736842105263e-06,
"loss": 1.4328,
"step": 13670
},
{
"epoch": 38.75,
"learning_rate": 9.978947368421053e-06,
"loss": 1.4329,
"step": 13680
},
{
"epoch": 38.78,
"learning_rate": 9.963157894736842e-06,
"loss": 1.4306,
"step": 13690
},
{
"epoch": 38.81,
"learning_rate": 9.94736842105263e-06,
"loss": 1.4342,
"step": 13700
},
{
"epoch": 38.84,
"learning_rate": 9.931578947368421e-06,
"loss": 1.4362,
"step": 13710
},
{
"epoch": 38.86,
"learning_rate": 9.91578947368421e-06,
"loss": 1.4336,
"step": 13720
},
{
"epoch": 38.89,
"learning_rate": 9.9e-06,
"loss": 1.439,
"step": 13730
},
{
"epoch": 38.92,
"learning_rate": 9.884210526315789e-06,
"loss": 1.4341,
"step": 13740
},
{
"epoch": 38.95,
"learning_rate": 9.868421052631579e-06,
"loss": 1.4328,
"step": 13750
},
{
"epoch": 38.98,
"learning_rate": 9.85263157894737e-06,
"loss": 1.4378,
"step": 13760
},
{
"epoch": 39.01,
"learning_rate": 9.836842105263158e-06,
"loss": 1.5636,
"step": 13770
},
{
"epoch": 39.04,
"learning_rate": 9.821052631578948e-06,
"loss": 1.4318,
"step": 13780
},
{
"epoch": 39.06,
"learning_rate": 9.805263157894737e-06,
"loss": 1.4361,
"step": 13790
},
{
"epoch": 39.09,
"learning_rate": 9.789473684210527e-06,
"loss": 1.4359,
"step": 13800
},
{
"epoch": 39.12,
"learning_rate": 9.773684210526316e-06,
"loss": 1.4312,
"step": 13810
},
{
"epoch": 39.15,
"learning_rate": 9.757894736842106e-06,
"loss": 1.4304,
"step": 13820
},
{
"epoch": 39.18,
"learning_rate": 9.742105263157895e-06,
"loss": 1.4334,
"step": 13830
},
{
"epoch": 39.21,
"learning_rate": 9.726315789473684e-06,
"loss": 1.4335,
"step": 13840
},
{
"epoch": 39.23,
"learning_rate": 9.710526315789474e-06,
"loss": 1.4316,
"step": 13850
},
{
"epoch": 39.26,
"learning_rate": 9.694736842105263e-06,
"loss": 1.4287,
"step": 13860
},
{
"epoch": 39.29,
"learning_rate": 9.678947368421053e-06,
"loss": 1.4313,
"step": 13870
},
{
"epoch": 39.32,
"learning_rate": 9.663157894736842e-06,
"loss": 1.4315,
"step": 13880
},
{
"epoch": 39.35,
"learning_rate": 9.647368421052632e-06,
"loss": 1.4299,
"step": 13890
},
{
"epoch": 39.38,
"learning_rate": 9.63157894736842e-06,
"loss": 1.4298,
"step": 13900
},
{
"epoch": 39.4,
"learning_rate": 9.61578947368421e-06,
"loss": 1.4294,
"step": 13910
},
{
"epoch": 39.43,
"learning_rate": 9.600000000000001e-06,
"loss": 1.4292,
"step": 13920
},
{
"epoch": 39.46,
"learning_rate": 9.58421052631579e-06,
"loss": 1.4318,
"step": 13930
},
{
"epoch": 39.49,
"learning_rate": 9.56842105263158e-06,
"loss": 1.4333,
"step": 13940
},
{
"epoch": 39.52,
"learning_rate": 9.552631578947369e-06,
"loss": 1.4315,
"step": 13950
},
{
"epoch": 39.55,
"learning_rate": 9.53684210526316e-06,
"loss": 1.4308,
"step": 13960
},
{
"epoch": 39.57,
"learning_rate": 9.521052631578948e-06,
"loss": 1.4315,
"step": 13970
},
{
"epoch": 39.6,
"learning_rate": 9.505263157894737e-06,
"loss": 1.4305,
"step": 13980
},
{
"epoch": 39.63,
"learning_rate": 9.489473684210527e-06,
"loss": 1.4323,
"step": 13990
},
{
"epoch": 39.66,
"learning_rate": 9.473684210526315e-06,
"loss": 1.4319,
"step": 14000
},
{
"epoch": 39.66,
"eval_denotation_accuracy": 0.5778876722006359,
"eval_loss": 2.177828311920166,
"eval_runtime": 341.1576,
"eval_samples_per_second": 8.298,
"eval_steps_per_second": 2.075,
"step": 14000
},
{
"epoch": 39.69,
"learning_rate": 9.457894736842106e-06,
"loss": 1.4318,
"step": 14010
},
{
"epoch": 39.71,
"learning_rate": 9.442105263157894e-06,
"loss": 1.4328,
"step": 14020
},
{
"epoch": 39.74,
"learning_rate": 9.426315789473685e-06,
"loss": 1.4336,
"step": 14030
},
{
"epoch": 39.77,
"learning_rate": 9.410526315789473e-06,
"loss": 1.4325,
"step": 14040
},
{
"epoch": 39.8,
"learning_rate": 9.394736842105262e-06,
"loss": 1.4311,
"step": 14050
},
{
"epoch": 39.83,
"learning_rate": 9.378947368421052e-06,
"loss": 1.4327,
"step": 14060
},
{
"epoch": 39.86,
"learning_rate": 9.363157894736841e-06,
"loss": 1.4325,
"step": 14070
},
{
"epoch": 39.88,
"learning_rate": 9.347368421052631e-06,
"loss": 1.4332,
"step": 14080
},
{
"epoch": 39.91,
"learning_rate": 9.331578947368422e-06,
"loss": 1.4337,
"step": 14090
},
{
"epoch": 39.94,
"learning_rate": 9.315789473684212e-06,
"loss": 1.4315,
"step": 14100
},
{
"epoch": 39.97,
"learning_rate": 9.3e-06,
"loss": 1.4296,
"step": 14110
},
{
"epoch": 40.0,
"learning_rate": 9.28421052631579e-06,
"loss": 1.4475,
"step": 14120
},
{
"epoch": 40.03,
"learning_rate": 9.26842105263158e-06,
"loss": 1.5554,
"step": 14130
},
{
"epoch": 40.06,
"learning_rate": 9.252631578947368e-06,
"loss": 1.4305,
"step": 14140
},
{
"epoch": 40.08,
"learning_rate": 9.236842105263159e-06,
"loss": 1.4331,
"step": 14150
},
{
"epoch": 40.11,
"learning_rate": 9.221052631578947e-06,
"loss": 1.4349,
"step": 14160
},
{
"epoch": 40.14,
"learning_rate": 9.205263157894738e-06,
"loss": 1.4333,
"step": 14170
},
{
"epoch": 40.17,
"learning_rate": 9.189473684210526e-06,
"loss": 1.4289,
"step": 14180
},
{
"epoch": 40.2,
"learning_rate": 9.173684210526315e-06,
"loss": 1.4321,
"step": 14190
},
{
"epoch": 40.23,
"learning_rate": 9.157894736842105e-06,
"loss": 1.435,
"step": 14200
},
{
"epoch": 40.25,
"learning_rate": 9.142105263157894e-06,
"loss": 1.4276,
"step": 14210
},
{
"epoch": 40.28,
"learning_rate": 9.126315789473684e-06,
"loss": 1.4337,
"step": 14220
},
{
"epoch": 40.31,
"learning_rate": 9.110526315789473e-06,
"loss": 1.4326,
"step": 14230
},
{
"epoch": 40.34,
"learning_rate": 9.094736842105263e-06,
"loss": 1.4431,
"step": 14240
},
{
"epoch": 40.37,
"learning_rate": 9.078947368421054e-06,
"loss": 1.4355,
"step": 14250
},
{
"epoch": 40.4,
"learning_rate": 9.063157894736842e-06,
"loss": 1.43,
"step": 14260
},
{
"epoch": 40.42,
"learning_rate": 9.047368421052633e-06,
"loss": 1.4316,
"step": 14270
},
{
"epoch": 40.45,
"learning_rate": 9.031578947368421e-06,
"loss": 1.4322,
"step": 14280
},
{
"epoch": 40.48,
"learning_rate": 9.015789473684212e-06,
"loss": 1.4332,
"step": 14290
},
{
"epoch": 40.51,
"learning_rate": 9e-06,
"loss": 1.4298,
"step": 14300
},
{
"epoch": 40.54,
"learning_rate": 8.98421052631579e-06,
"loss": 1.4304,
"step": 14310
},
{
"epoch": 40.57,
"learning_rate": 8.96842105263158e-06,
"loss": 1.433,
"step": 14320
},
{
"epoch": 40.59,
"learning_rate": 8.952631578947368e-06,
"loss": 1.4295,
"step": 14330
},
{
"epoch": 40.62,
"learning_rate": 8.936842105263158e-06,
"loss": 1.4269,
"step": 14340
},
{
"epoch": 40.65,
"learning_rate": 8.921052631578947e-06,
"loss": 1.4351,
"step": 14350
},
{
"epoch": 40.68,
"learning_rate": 8.905263157894737e-06,
"loss": 1.4289,
"step": 14360
},
{
"epoch": 40.71,
"learning_rate": 8.889473684210526e-06,
"loss": 1.4306,
"step": 14370
},
{
"epoch": 40.73,
"learning_rate": 8.873684210526316e-06,
"loss": 1.4338,
"step": 14380
},
{
"epoch": 40.76,
"learning_rate": 8.857894736842105e-06,
"loss": 1.4284,
"step": 14390
},
{
"epoch": 40.79,
"learning_rate": 8.842105263157893e-06,
"loss": 1.4361,
"step": 14400
},
{
"epoch": 40.82,
"learning_rate": 8.826315789473684e-06,
"loss": 1.4295,
"step": 14410
},
{
"epoch": 40.85,
"learning_rate": 8.810526315789474e-06,
"loss": 1.4303,
"step": 14420
},
{
"epoch": 40.88,
"learning_rate": 8.794736842105264e-06,
"loss": 1.4279,
"step": 14430
},
{
"epoch": 40.9,
"learning_rate": 8.778947368421053e-06,
"loss": 1.4304,
"step": 14440
},
{
"epoch": 40.93,
"learning_rate": 8.763157894736843e-06,
"loss": 1.4327,
"step": 14450
},
{
"epoch": 40.96,
"learning_rate": 8.747368421052632e-06,
"loss": 1.4279,
"step": 14460
},
{
"epoch": 40.99,
"learning_rate": 8.731578947368422e-06,
"loss": 1.4316,
"step": 14470
},
{
"epoch": 41.02,
"learning_rate": 8.715789473684211e-06,
"loss": 1.5542,
"step": 14480
},
{
"epoch": 41.05,
"learning_rate": 8.7e-06,
"loss": 1.4297,
"step": 14490
},
{
"epoch": 41.08,
"learning_rate": 8.68421052631579e-06,
"loss": 1.4296,
"step": 14500
},
{
"epoch": 41.1,
"learning_rate": 8.668421052631579e-06,
"loss": 1.4287,
"step": 14510
},
{
"epoch": 41.13,
"learning_rate": 8.652631578947369e-06,
"loss": 1.4305,
"step": 14520
},
{
"epoch": 41.16,
"learning_rate": 8.636842105263158e-06,
"loss": 1.4293,
"step": 14530
},
{
"epoch": 41.19,
"learning_rate": 8.621052631578948e-06,
"loss": 1.4329,
"step": 14540
},
{
"epoch": 41.22,
"learning_rate": 8.605263157894737e-06,
"loss": 1.4309,
"step": 14550
},
{
"epoch": 41.25,
"learning_rate": 8.589473684210525e-06,
"loss": 1.4331,
"step": 14560
},
{
"epoch": 41.27,
"learning_rate": 8.573684210526316e-06,
"loss": 1.4307,
"step": 14570
},
{
"epoch": 41.3,
"learning_rate": 8.557894736842104e-06,
"loss": 1.432,
"step": 14580
},
{
"epoch": 41.33,
"learning_rate": 8.542105263157896e-06,
"loss": 1.4288,
"step": 14590
},
{
"epoch": 41.36,
"learning_rate": 8.526315789473685e-06,
"loss": 1.4297,
"step": 14600
},
{
"epoch": 41.39,
"learning_rate": 8.510526315789475e-06,
"loss": 1.433,
"step": 14610
},
{
"epoch": 41.42,
"learning_rate": 8.494736842105264e-06,
"loss": 1.4293,
"step": 14620
},
{
"epoch": 41.44,
"learning_rate": 8.478947368421053e-06,
"loss": 1.4293,
"step": 14630
},
{
"epoch": 41.47,
"learning_rate": 8.463157894736843e-06,
"loss": 1.435,
"step": 14640
},
{
"epoch": 41.5,
"learning_rate": 8.447368421052632e-06,
"loss": 1.4266,
"step": 14650
},
{
"epoch": 41.53,
"learning_rate": 8.431578947368422e-06,
"loss": 1.4298,
"step": 14660
},
{
"epoch": 41.56,
"learning_rate": 8.41578947368421e-06,
"loss": 1.4384,
"step": 14670
},
{
"epoch": 41.58,
"learning_rate": 8.400000000000001e-06,
"loss": 1.4358,
"step": 14680
},
{
"epoch": 41.61,
"learning_rate": 8.38421052631579e-06,
"loss": 1.43,
"step": 14690
},
{
"epoch": 41.64,
"learning_rate": 8.368421052631578e-06,
"loss": 1.4327,
"step": 14700
},
{
"epoch": 41.67,
"learning_rate": 8.352631578947369e-06,
"loss": 1.4299,
"step": 14710
},
{
"epoch": 41.7,
"learning_rate": 8.336842105263157e-06,
"loss": 1.4313,
"step": 14720
},
{
"epoch": 41.73,
"learning_rate": 8.321052631578947e-06,
"loss": 1.4278,
"step": 14730
},
{
"epoch": 41.75,
"learning_rate": 8.305263157894736e-06,
"loss": 1.428,
"step": 14740
},
{
"epoch": 41.78,
"learning_rate": 8.289473684210526e-06,
"loss": 1.429,
"step": 14750
},
{
"epoch": 41.81,
"learning_rate": 8.273684210526317e-06,
"loss": 1.4287,
"step": 14760
},
{
"epoch": 41.84,
"learning_rate": 8.257894736842105e-06,
"loss": 1.43,
"step": 14770
},
{
"epoch": 41.87,
"learning_rate": 8.242105263157896e-06,
"loss": 1.4293,
"step": 14780
},
{
"epoch": 41.9,
"learning_rate": 8.226315789473684e-06,
"loss": 1.4286,
"step": 14790
},
{
"epoch": 41.92,
"learning_rate": 8.210526315789475e-06,
"loss": 1.434,
"step": 14800
},
{
"epoch": 41.95,
"learning_rate": 8.194736842105263e-06,
"loss": 1.4282,
"step": 14810
},
{
"epoch": 41.98,
"learning_rate": 8.178947368421054e-06,
"loss": 1.4293,
"step": 14820
},
{
"epoch": 42.01,
"learning_rate": 8.163157894736842e-06,
"loss": 1.555,
"step": 14830
},
{
"epoch": 42.04,
"learning_rate": 8.147368421052631e-06,
"loss": 1.4281,
"step": 14840
},
{
"epoch": 42.07,
"learning_rate": 8.131578947368421e-06,
"loss": 1.4272,
"step": 14850
},
{
"epoch": 42.1,
"learning_rate": 8.11578947368421e-06,
"loss": 1.4316,
"step": 14860
},
{
"epoch": 42.12,
"learning_rate": 8.1e-06,
"loss": 1.4275,
"step": 14870
},
{
"epoch": 42.15,
"learning_rate": 8.084210526315789e-06,
"loss": 1.4261,
"step": 14880
},
{
"epoch": 42.18,
"learning_rate": 8.06842105263158e-06,
"loss": 1.4293,
"step": 14890
},
{
"epoch": 42.21,
"learning_rate": 8.052631578947368e-06,
"loss": 1.4279,
"step": 14900
},
{
"epoch": 42.24,
"learning_rate": 8.036842105263157e-06,
"loss": 1.4273,
"step": 14910
},
{
"epoch": 42.27,
"learning_rate": 8.021052631578949e-06,
"loss": 1.43,
"step": 14920
},
{
"epoch": 42.29,
"learning_rate": 8.005263157894737e-06,
"loss": 1.4286,
"step": 14930
},
{
"epoch": 42.32,
"learning_rate": 7.989473684210528e-06,
"loss": 1.4282,
"step": 14940
},
{
"epoch": 42.35,
"learning_rate": 7.973684210526316e-06,
"loss": 1.4303,
"step": 14950
},
{
"epoch": 42.38,
"learning_rate": 7.957894736842107e-06,
"loss": 1.4268,
"step": 14960
},
{
"epoch": 42.41,
"learning_rate": 7.942105263157895e-06,
"loss": 1.426,
"step": 14970
},
{
"epoch": 42.44,
"learning_rate": 7.926315789473684e-06,
"loss": 1.4271,
"step": 14980
},
{
"epoch": 42.46,
"learning_rate": 7.910526315789474e-06,
"loss": 1.4281,
"step": 14990
},
{
"epoch": 42.49,
"learning_rate": 7.894736842105263e-06,
"loss": 1.4318,
"step": 15000
},
{
"epoch": 42.49,
"eval_denotation_accuracy": 0.5708230307311903,
"eval_loss": 2.1977450847625732,
"eval_runtime": 342.8816,
"eval_samples_per_second": 8.256,
"eval_steps_per_second": 2.065,
"step": 15000
},
{
"epoch": 42.52,
"learning_rate": 7.878947368421053e-06,
"loss": 1.4315,
"step": 15010
},
{
"epoch": 42.55,
"learning_rate": 7.863157894736842e-06,
"loss": 1.4316,
"step": 15020
},
{
"epoch": 42.58,
"learning_rate": 7.847368421052632e-06,
"loss": 1.4299,
"step": 15030
},
{
"epoch": 42.6,
"learning_rate": 7.831578947368421e-06,
"loss": 1.4309,
"step": 15040
},
{
"epoch": 42.63,
"learning_rate": 7.81578947368421e-06,
"loss": 1.4283,
"step": 15050
},
{
"epoch": 42.66,
"learning_rate": 7.8e-06,
"loss": 1.4293,
"step": 15060
},
{
"epoch": 42.69,
"learning_rate": 7.784210526315789e-06,
"loss": 1.4278,
"step": 15070
},
{
"epoch": 42.72,
"learning_rate": 7.768421052631579e-06,
"loss": 1.4293,
"step": 15080
},
{
"epoch": 42.75,
"learning_rate": 7.75263157894737e-06,
"loss": 1.4329,
"step": 15090
},
{
"epoch": 42.77,
"learning_rate": 7.73684210526316e-06,
"loss": 1.4268,
"step": 15100
},
{
"epoch": 42.8,
"learning_rate": 7.721052631578948e-06,
"loss": 1.427,
"step": 15110
},
{
"epoch": 42.83,
"learning_rate": 7.705263157894737e-06,
"loss": 1.4275,
"step": 15120
},
{
"epoch": 42.86,
"learning_rate": 7.689473684210527e-06,
"loss": 1.4275,
"step": 15130
},
{
"epoch": 42.89,
"learning_rate": 7.673684210526316e-06,
"loss": 1.4309,
"step": 15140
},
{
"epoch": 42.92,
"learning_rate": 7.657894736842106e-06,
"loss": 1.4273,
"step": 15150
},
{
"epoch": 42.94,
"learning_rate": 7.642105263157895e-06,
"loss": 1.4312,
"step": 15160
},
{
"epoch": 42.97,
"learning_rate": 7.626315789473685e-06,
"loss": 1.4264,
"step": 15170
},
{
"epoch": 43.0,
"learning_rate": 7.610526315789474e-06,
"loss": 1.5505,
"step": 15180
},
{
"epoch": 43.03,
"learning_rate": 7.594736842105262e-06,
"loss": 1.4282,
"step": 15190
},
{
"epoch": 43.06,
"learning_rate": 7.578947368421053e-06,
"loss": 1.427,
"step": 15200
},
{
"epoch": 43.09,
"learning_rate": 7.563157894736842e-06,
"loss": 1.4284,
"step": 15210
},
{
"epoch": 43.12,
"learning_rate": 7.5473684210526326e-06,
"loss": 1.4278,
"step": 15220
},
{
"epoch": 43.14,
"learning_rate": 7.531578947368421e-06,
"loss": 1.4264,
"step": 15230
},
{
"epoch": 43.17,
"learning_rate": 7.5157894736842115e-06,
"loss": 1.4283,
"step": 15240
},
{
"epoch": 43.2,
"learning_rate": 7.5e-06,
"loss": 1.4262,
"step": 15250
},
{
"epoch": 43.23,
"learning_rate": 7.48421052631579e-06,
"loss": 1.4298,
"step": 15260
},
{
"epoch": 43.26,
"learning_rate": 7.468421052631579e-06,
"loss": 1.4263,
"step": 15270
},
{
"epoch": 43.29,
"learning_rate": 7.452631578947369e-06,
"loss": 1.4272,
"step": 15280
},
{
"epoch": 43.31,
"learning_rate": 7.436842105263157e-06,
"loss": 1.4274,
"step": 15290
},
{
"epoch": 43.34,
"learning_rate": 7.421052631578948e-06,
"loss": 1.429,
"step": 15300
},
{
"epoch": 43.37,
"learning_rate": 7.405263157894737e-06,
"loss": 1.4264,
"step": 15310
},
{
"epoch": 43.4,
"learning_rate": 7.389473684210527e-06,
"loss": 1.4284,
"step": 15320
},
{
"epoch": 43.43,
"learning_rate": 7.373684210526316e-06,
"loss": 1.4274,
"step": 15330
},
{
"epoch": 43.45,
"learning_rate": 7.357894736842106e-06,
"loss": 1.4287,
"step": 15340
},
{
"epoch": 43.48,
"learning_rate": 7.342105263157895e-06,
"loss": 1.4279,
"step": 15350
},
{
"epoch": 43.51,
"learning_rate": 7.326315789473684e-06,
"loss": 1.4313,
"step": 15360
},
{
"epoch": 43.54,
"learning_rate": 7.310526315789473e-06,
"loss": 1.4283,
"step": 15370
},
{
"epoch": 43.57,
"learning_rate": 7.2947368421052636e-06,
"loss": 1.4261,
"step": 15380
},
{
"epoch": 43.6,
"learning_rate": 7.278947368421053e-06,
"loss": 1.4273,
"step": 15390
},
{
"epoch": 43.62,
"learning_rate": 7.2631578947368426e-06,
"loss": 1.4275,
"step": 15400
},
{
"epoch": 43.65,
"learning_rate": 7.247368421052632e-06,
"loss": 1.4278,
"step": 15410
},
{
"epoch": 43.68,
"learning_rate": 7.2315789473684215e-06,
"loss": 1.4265,
"step": 15420
},
{
"epoch": 43.71,
"learning_rate": 7.21578947368421e-06,
"loss": 1.4297,
"step": 15430
},
{
"epoch": 43.74,
"learning_rate": 7.2e-06,
"loss": 1.4275,
"step": 15440
},
{
"epoch": 43.77,
"learning_rate": 7.184210526315789e-06,
"loss": 1.4284,
"step": 15450
},
{
"epoch": 43.79,
"learning_rate": 7.1684210526315795e-06,
"loss": 1.4289,
"step": 15460
},
{
"epoch": 43.82,
"learning_rate": 7.152631578947369e-06,
"loss": 1.4292,
"step": 15470
},
{
"epoch": 43.85,
"learning_rate": 7.1368421052631585e-06,
"loss": 1.4262,
"step": 15480
},
{
"epoch": 43.88,
"learning_rate": 7.121052631578948e-06,
"loss": 1.428,
"step": 15490
},
{
"epoch": 43.91,
"learning_rate": 7.105263157894737e-06,
"loss": 1.4269,
"step": 15500
},
{
"epoch": 43.94,
"learning_rate": 7.089473684210526e-06,
"loss": 1.4283,
"step": 15510
},
{
"epoch": 43.96,
"learning_rate": 7.073684210526316e-06,
"loss": 1.4288,
"step": 15520
},
{
"epoch": 43.99,
"learning_rate": 7.057894736842105e-06,
"loss": 1.4249,
"step": 15530
},
{
"epoch": 44.02,
"learning_rate": 7.042105263157895e-06,
"loss": 1.5519,
"step": 15540
},
{
"epoch": 44.05,
"learning_rate": 7.026315789473685e-06,
"loss": 1.4271,
"step": 15550
},
{
"epoch": 44.08,
"learning_rate": 7.010526315789474e-06,
"loss": 1.4279,
"step": 15560
},
{
"epoch": 44.11,
"learning_rate": 6.994736842105263e-06,
"loss": 1.4269,
"step": 15570
},
{
"epoch": 44.14,
"learning_rate": 6.9789473684210525e-06,
"loss": 1.4299,
"step": 15580
},
{
"epoch": 44.16,
"learning_rate": 6.963157894736842e-06,
"loss": 1.4262,
"step": 15590
},
{
"epoch": 44.19,
"learning_rate": 6.9473684210526315e-06,
"loss": 1.4258,
"step": 15600
},
{
"epoch": 44.22,
"learning_rate": 6.931578947368421e-06,
"loss": 1.4259,
"step": 15610
},
{
"epoch": 44.25,
"learning_rate": 6.9157894736842105e-06,
"loss": 1.4278,
"step": 15620
},
{
"epoch": 44.28,
"learning_rate": 6.900000000000001e-06,
"loss": 1.4249,
"step": 15630
},
{
"epoch": 44.31,
"learning_rate": 6.8842105263157895e-06,
"loss": 1.4281,
"step": 15640
},
{
"epoch": 44.33,
"learning_rate": 6.868421052631579e-06,
"loss": 1.4258,
"step": 15650
},
{
"epoch": 44.36,
"learning_rate": 6.8526315789473685e-06,
"loss": 1.4272,
"step": 15660
},
{
"epoch": 44.39,
"learning_rate": 6.836842105263158e-06,
"loss": 1.4268,
"step": 15670
},
{
"epoch": 44.42,
"learning_rate": 6.8210526315789475e-06,
"loss": 1.4277,
"step": 15680
},
{
"epoch": 44.45,
"learning_rate": 6.805263157894737e-06,
"loss": 1.4272,
"step": 15690
},
{
"epoch": 44.47,
"learning_rate": 6.7894736842105264e-06,
"loss": 1.4277,
"step": 15700
},
{
"epoch": 44.5,
"learning_rate": 6.773684210526317e-06,
"loss": 1.4272,
"step": 15710
},
{
"epoch": 44.53,
"learning_rate": 6.7578947368421054e-06,
"loss": 1.4265,
"step": 15720
},
{
"epoch": 44.56,
"learning_rate": 6.742105263157895e-06,
"loss": 1.4262,
"step": 15730
},
{
"epoch": 44.59,
"learning_rate": 6.726315789473684e-06,
"loss": 1.4259,
"step": 15740
},
{
"epoch": 44.62,
"learning_rate": 6.710526315789474e-06,
"loss": 1.427,
"step": 15750
},
{
"epoch": 44.64,
"learning_rate": 6.694736842105263e-06,
"loss": 1.4266,
"step": 15760
},
{
"epoch": 44.67,
"learning_rate": 6.678947368421053e-06,
"loss": 1.4265,
"step": 15770
},
{
"epoch": 44.7,
"learning_rate": 6.663157894736842e-06,
"loss": 1.4267,
"step": 15780
},
{
"epoch": 44.73,
"learning_rate": 6.647368421052631e-06,
"loss": 1.4263,
"step": 15790
},
{
"epoch": 44.76,
"learning_rate": 6.631578947368421e-06,
"loss": 1.4324,
"step": 15800
},
{
"epoch": 44.79,
"learning_rate": 6.615789473684211e-06,
"loss": 1.4265,
"step": 15810
},
{
"epoch": 44.81,
"learning_rate": 6.6e-06,
"loss": 1.428,
"step": 15820
},
{
"epoch": 44.84,
"learning_rate": 6.58421052631579e-06,
"loss": 1.4252,
"step": 15830
},
{
"epoch": 44.87,
"learning_rate": 6.568421052631579e-06,
"loss": 1.4256,
"step": 15840
},
{
"epoch": 44.9,
"learning_rate": 6.552631578947369e-06,
"loss": 1.4266,
"step": 15850
},
{
"epoch": 44.93,
"learning_rate": 6.5368421052631575e-06,
"loss": 1.4275,
"step": 15860
},
{
"epoch": 44.96,
"learning_rate": 6.521052631578947e-06,
"loss": 1.4265,
"step": 15870
},
{
"epoch": 44.98,
"learning_rate": 6.505263157894737e-06,
"loss": 1.4257,
"step": 15880
},
{
"epoch": 45.01,
"learning_rate": 6.489473684210527e-06,
"loss": 1.5515,
"step": 15890
},
{
"epoch": 45.04,
"learning_rate": 6.473684210526316e-06,
"loss": 1.4271,
"step": 15900
},
{
"epoch": 45.07,
"learning_rate": 6.457894736842106e-06,
"loss": 1.4269,
"step": 15910
},
{
"epoch": 45.1,
"learning_rate": 6.442105263157895e-06,
"loss": 1.4258,
"step": 15920
},
{
"epoch": 45.13,
"learning_rate": 6.426315789473684e-06,
"loss": 1.4271,
"step": 15930
},
{
"epoch": 45.16,
"learning_rate": 6.410526315789473e-06,
"loss": 1.4269,
"step": 15940
},
{
"epoch": 45.18,
"learning_rate": 6.394736842105263e-06,
"loss": 1.4265,
"step": 15950
},
{
"epoch": 45.21,
"learning_rate": 6.378947368421053e-06,
"loss": 1.4269,
"step": 15960
},
{
"epoch": 45.24,
"learning_rate": 6.363157894736843e-06,
"loss": 1.4267,
"step": 15970
},
{
"epoch": 45.27,
"learning_rate": 6.347368421052632e-06,
"loss": 1.4318,
"step": 15980
},
{
"epoch": 45.3,
"learning_rate": 6.331578947368422e-06,
"loss": 1.4256,
"step": 15990
},
{
"epoch": 45.32,
"learning_rate": 6.31578947368421e-06,
"loss": 1.4257,
"step": 16000
},
{
"epoch": 45.32,
"eval_denotation_accuracy": 0.572942423172024,
"eval_loss": 2.181939125061035,
"eval_runtime": 339.4655,
"eval_samples_per_second": 8.34,
"eval_steps_per_second": 2.086,
"step": 16000
},
{
"epoch": 45.35,
"learning_rate": 6.3e-06,
"loss": 1.429,
"step": 16010
},
{
"epoch": 45.38,
"learning_rate": 6.284210526315789e-06,
"loss": 1.4262,
"step": 16020
},
{
"epoch": 45.41,
"learning_rate": 6.268421052631579e-06,
"loss": 1.4312,
"step": 16030
},
{
"epoch": 45.44,
"learning_rate": 6.252631578947368e-06,
"loss": 1.4271,
"step": 16040
},
{
"epoch": 45.47,
"learning_rate": 6.236842105263159e-06,
"loss": 1.427,
"step": 16050
},
{
"epoch": 45.49,
"learning_rate": 6.221052631578948e-06,
"loss": 1.4278,
"step": 16060
},
{
"epoch": 45.52,
"learning_rate": 6.205263157894737e-06,
"loss": 1.4258,
"step": 16070
},
{
"epoch": 45.55,
"learning_rate": 6.189473684210526e-06,
"loss": 1.4253,
"step": 16080
},
{
"epoch": 45.58,
"learning_rate": 6.173684210526316e-06,
"loss": 1.4278,
"step": 16090
},
{
"epoch": 45.61,
"learning_rate": 6.157894736842105e-06,
"loss": 1.4276,
"step": 16100
},
{
"epoch": 45.64,
"learning_rate": 6.142105263157895e-06,
"loss": 1.4278,
"step": 16110
},
{
"epoch": 45.66,
"learning_rate": 6.126315789473684e-06,
"loss": 1.4258,
"step": 16120
},
{
"epoch": 45.69,
"learning_rate": 6.1105263157894746e-06,
"loss": 1.4263,
"step": 16130
},
{
"epoch": 45.72,
"learning_rate": 6.094736842105263e-06,
"loss": 1.4297,
"step": 16140
},
{
"epoch": 45.75,
"learning_rate": 6.078947368421053e-06,
"loss": 1.4296,
"step": 16150
},
{
"epoch": 45.78,
"learning_rate": 6.063157894736842e-06,
"loss": 1.4314,
"step": 16160
},
{
"epoch": 45.81,
"learning_rate": 6.047368421052632e-06,
"loss": 1.4254,
"step": 16170
},
{
"epoch": 45.83,
"learning_rate": 6.031578947368421e-06,
"loss": 1.4265,
"step": 16180
},
{
"epoch": 45.86,
"learning_rate": 6.015789473684211e-06,
"loss": 1.4254,
"step": 16190
},
{
"epoch": 45.89,
"learning_rate": 6e-06,
"loss": 1.4242,
"step": 16200
},
{
"epoch": 45.92,
"learning_rate": 5.98421052631579e-06,
"loss": 1.4248,
"step": 16210
},
{
"epoch": 45.95,
"learning_rate": 5.968421052631579e-06,
"loss": 1.4274,
"step": 16220
},
{
"epoch": 45.97,
"learning_rate": 5.952631578947369e-06,
"loss": 1.428,
"step": 16230
},
{
"epoch": 46.01,
"learning_rate": 5.936842105263158e-06,
"loss": 1.5503,
"step": 16240
},
{
"epoch": 46.03,
"learning_rate": 5.921052631578948e-06,
"loss": 1.4249,
"step": 16250
},
{
"epoch": 46.06,
"learning_rate": 5.905263157894737e-06,
"loss": 1.4247,
"step": 16260
},
{
"epoch": 46.09,
"learning_rate": 5.889473684210527e-06,
"loss": 1.4243,
"step": 16270
},
{
"epoch": 46.12,
"learning_rate": 5.873684210526315e-06,
"loss": 1.4257,
"step": 16280
},
{
"epoch": 46.15,
"learning_rate": 5.857894736842105e-06,
"loss": 1.4288,
"step": 16290
},
{
"epoch": 46.18,
"learning_rate": 5.842105263157895e-06,
"loss": 1.4253,
"step": 16300
},
{
"epoch": 46.2,
"learning_rate": 5.8263157894736846e-06,
"loss": 1.4309,
"step": 16310
},
{
"epoch": 46.23,
"learning_rate": 5.810526315789474e-06,
"loss": 1.4274,
"step": 16320
},
{
"epoch": 46.26,
"learning_rate": 5.7947368421052635e-06,
"loss": 1.4261,
"step": 16330
},
{
"epoch": 46.29,
"learning_rate": 5.778947368421053e-06,
"loss": 1.4261,
"step": 16340
},
{
"epoch": 46.32,
"learning_rate": 5.763157894736842e-06,
"loss": 1.4272,
"step": 16350
},
{
"epoch": 46.34,
"learning_rate": 5.747368421052631e-06,
"loss": 1.4262,
"step": 16360
},
{
"epoch": 46.37,
"learning_rate": 5.731578947368421e-06,
"loss": 1.4265,
"step": 16370
},
{
"epoch": 46.4,
"learning_rate": 5.715789473684211e-06,
"loss": 1.4247,
"step": 16380
},
{
"epoch": 46.43,
"learning_rate": 5.7000000000000005e-06,
"loss": 1.4248,
"step": 16390
},
{
"epoch": 46.46,
"learning_rate": 5.68421052631579e-06,
"loss": 1.4259,
"step": 16400
},
{
"epoch": 46.49,
"learning_rate": 5.6684210526315795e-06,
"loss": 1.4254,
"step": 16410
},
{
"epoch": 46.51,
"learning_rate": 5.652631578947368e-06,
"loss": 1.4268,
"step": 16420
},
{
"epoch": 46.54,
"learning_rate": 5.636842105263158e-06,
"loss": 1.427,
"step": 16430
},
{
"epoch": 46.57,
"learning_rate": 5.621052631578947e-06,
"loss": 1.4264,
"step": 16440
},
{
"epoch": 46.6,
"learning_rate": 5.605263157894737e-06,
"loss": 1.4265,
"step": 16450
},
{
"epoch": 46.63,
"learning_rate": 5.589473684210527e-06,
"loss": 1.4251,
"step": 16460
},
{
"epoch": 46.66,
"learning_rate": 5.573684210526316e-06,
"loss": 1.4258,
"step": 16470
},
{
"epoch": 46.68,
"learning_rate": 5.557894736842106e-06,
"loss": 1.4255,
"step": 16480
},
{
"epoch": 46.71,
"learning_rate": 5.542105263157895e-06,
"loss": 1.4271,
"step": 16490
},
{
"epoch": 46.74,
"learning_rate": 5.526315789473684e-06,
"loss": 1.4261,
"step": 16500
},
{
"epoch": 46.77,
"learning_rate": 5.5105263157894735e-06,
"loss": 1.4262,
"step": 16510
},
{
"epoch": 46.8,
"learning_rate": 5.494736842105263e-06,
"loss": 1.4251,
"step": 16520
},
{
"epoch": 46.83,
"learning_rate": 5.4789473684210525e-06,
"loss": 1.4256,
"step": 16530
},
{
"epoch": 46.85,
"learning_rate": 5.463157894736842e-06,
"loss": 1.425,
"step": 16540
},
{
"epoch": 46.88,
"learning_rate": 5.447368421052632e-06,
"loss": 1.4269,
"step": 16550
},
{
"epoch": 46.91,
"learning_rate": 5.431578947368422e-06,
"loss": 1.4257,
"step": 16560
},
{
"epoch": 46.94,
"learning_rate": 5.4157894736842105e-06,
"loss": 1.4249,
"step": 16570
},
{
"epoch": 46.97,
"learning_rate": 5.4e-06,
"loss": 1.4279,
"step": 16580
},
{
"epoch": 46.99,
"learning_rate": 5.3842105263157895e-06,
"loss": 1.4262,
"step": 16590
},
{
"epoch": 47.03,
"learning_rate": 5.368421052631579e-06,
"loss": 1.5532,
"step": 16600
},
{
"epoch": 47.05,
"learning_rate": 5.3526315789473684e-06,
"loss": 1.4252,
"step": 16610
},
{
"epoch": 47.08,
"learning_rate": 5.336842105263158e-06,
"loss": 1.4255,
"step": 16620
},
{
"epoch": 47.11,
"learning_rate": 5.321052631578948e-06,
"loss": 1.4251,
"step": 16630
},
{
"epoch": 47.14,
"learning_rate": 5.305263157894737e-06,
"loss": 1.4238,
"step": 16640
},
{
"epoch": 47.17,
"learning_rate": 5.289473684210526e-06,
"loss": 1.4243,
"step": 16650
},
{
"epoch": 47.19,
"learning_rate": 5.273684210526316e-06,
"loss": 1.4262,
"step": 16660
},
{
"epoch": 47.22,
"learning_rate": 5.257894736842105e-06,
"loss": 1.4283,
"step": 16670
},
{
"epoch": 47.25,
"learning_rate": 5.242105263157895e-06,
"loss": 1.425,
"step": 16680
},
{
"epoch": 47.28,
"learning_rate": 5.226315789473684e-06,
"loss": 1.4269,
"step": 16690
},
{
"epoch": 47.31,
"learning_rate": 5.210526315789474e-06,
"loss": 1.4249,
"step": 16700
},
{
"epoch": 47.34,
"learning_rate": 5.194736842105263e-06,
"loss": 1.4254,
"step": 16710
},
{
"epoch": 47.36,
"learning_rate": 5.178947368421053e-06,
"loss": 1.4247,
"step": 16720
},
{
"epoch": 47.39,
"learning_rate": 5.163157894736842e-06,
"loss": 1.4337,
"step": 16730
},
{
"epoch": 47.42,
"learning_rate": 5.147368421052632e-06,
"loss": 1.4268,
"step": 16740
},
{
"epoch": 47.45,
"learning_rate": 5.131578947368421e-06,
"loss": 1.4242,
"step": 16750
},
{
"epoch": 47.48,
"learning_rate": 5.115789473684211e-06,
"loss": 1.4241,
"step": 16760
},
{
"epoch": 47.51,
"learning_rate": 5.1e-06,
"loss": 1.4259,
"step": 16770
},
{
"epoch": 47.53,
"learning_rate": 5.084210526315789e-06,
"loss": 1.4265,
"step": 16780
},
{
"epoch": 47.56,
"learning_rate": 5.0684210526315784e-06,
"loss": 1.4268,
"step": 16790
},
{
"epoch": 47.59,
"learning_rate": 5.052631578947369e-06,
"loss": 1.4248,
"step": 16800
},
{
"epoch": 47.62,
"learning_rate": 5.036842105263158e-06,
"loss": 1.4245,
"step": 16810
},
{
"epoch": 47.65,
"learning_rate": 5.021052631578948e-06,
"loss": 1.4267,
"step": 16820
},
{
"epoch": 47.68,
"learning_rate": 5.005263157894737e-06,
"loss": 1.4252,
"step": 16830
},
{
"epoch": 47.7,
"learning_rate": 4.989473684210527e-06,
"loss": 1.4241,
"step": 16840
},
{
"epoch": 47.73,
"learning_rate": 4.973684210526315e-06,
"loss": 1.4243,
"step": 16850
},
{
"epoch": 47.76,
"learning_rate": 4.957894736842105e-06,
"loss": 1.4271,
"step": 16860
},
{
"epoch": 47.79,
"learning_rate": 4.942105263157894e-06,
"loss": 1.4239,
"step": 16870
},
{
"epoch": 47.82,
"learning_rate": 4.926315789473685e-06,
"loss": 1.4292,
"step": 16880
},
{
"epoch": 47.84,
"learning_rate": 4.910526315789474e-06,
"loss": 1.4252,
"step": 16890
},
{
"epoch": 47.87,
"learning_rate": 4.894736842105264e-06,
"loss": 1.4254,
"step": 16900
},
{
"epoch": 47.9,
"learning_rate": 4.878947368421053e-06,
"loss": 1.4261,
"step": 16910
},
{
"epoch": 47.93,
"learning_rate": 4.863157894736842e-06,
"loss": 1.4252,
"step": 16920
},
{
"epoch": 47.96,
"learning_rate": 4.847368421052631e-06,
"loss": 1.4256,
"step": 16930
},
{
"epoch": 47.99,
"learning_rate": 4.831578947368421e-06,
"loss": 1.4258,
"step": 16940
},
{
"epoch": 48.02,
"learning_rate": 4.81578947368421e-06,
"loss": 1.5516,
"step": 16950
},
{
"epoch": 48.05,
"learning_rate": 4.800000000000001e-06,
"loss": 1.4234,
"step": 16960
},
{
"epoch": 48.07,
"learning_rate": 4.78421052631579e-06,
"loss": 1.4246,
"step": 16970
},
{
"epoch": 48.1,
"learning_rate": 4.76842105263158e-06,
"loss": 1.4253,
"step": 16980
},
{
"epoch": 48.13,
"learning_rate": 4.752631578947368e-06,
"loss": 1.4241,
"step": 16990
},
{
"epoch": 48.16,
"learning_rate": 4.736842105263158e-06,
"loss": 1.4254,
"step": 17000
},
{
"epoch": 48.16,
"eval_denotation_accuracy": 0.5757682797598022,
"eval_loss": 2.200056314468384,
"eval_runtime": 339.9367,
"eval_samples_per_second": 8.328,
"eval_steps_per_second": 2.083,
"step": 17000
},
{
"epoch": 48.19,
"learning_rate": 4.721052631578947e-06,
"loss": 1.4268,
"step": 17010
},
{
"epoch": 48.21,
"learning_rate": 4.705263157894737e-06,
"loss": 1.4245,
"step": 17020
},
{
"epoch": 48.24,
"learning_rate": 4.689473684210526e-06,
"loss": 1.4318,
"step": 17030
},
{
"epoch": 48.27,
"learning_rate": 4.673684210526316e-06,
"loss": 1.4239,
"step": 17040
},
{
"epoch": 48.3,
"learning_rate": 4.657894736842106e-06,
"loss": 1.4245,
"step": 17050
},
{
"epoch": 48.33,
"learning_rate": 4.642105263157895e-06,
"loss": 1.4254,
"step": 17060
},
{
"epoch": 48.36,
"learning_rate": 4.626315789473684e-06,
"loss": 1.4241,
"step": 17070
},
{
"epoch": 48.38,
"learning_rate": 4.610526315789474e-06,
"loss": 1.4248,
"step": 17080
},
{
"epoch": 48.41,
"learning_rate": 4.594736842105263e-06,
"loss": 1.4248,
"step": 17090
},
{
"epoch": 48.44,
"learning_rate": 4.578947368421053e-06,
"loss": 1.4245,
"step": 17100
},
{
"epoch": 48.47,
"learning_rate": 4.563157894736842e-06,
"loss": 1.4241,
"step": 17110
},
{
"epoch": 48.5,
"learning_rate": 4.547368421052632e-06,
"loss": 1.4271,
"step": 17120
},
{
"epoch": 48.53,
"learning_rate": 4.531578947368421e-06,
"loss": 1.4247,
"step": 17130
},
{
"epoch": 48.55,
"learning_rate": 4.515789473684211e-06,
"loss": 1.4268,
"step": 17140
},
{
"epoch": 48.58,
"learning_rate": 4.5e-06,
"loss": 1.4245,
"step": 17150
},
{
"epoch": 48.61,
"learning_rate": 4.48421052631579e-06,
"loss": 1.4276,
"step": 17160
},
{
"epoch": 48.64,
"learning_rate": 4.468421052631579e-06,
"loss": 1.4245,
"step": 17170
},
{
"epoch": 48.67,
"learning_rate": 4.452631578947369e-06,
"loss": 1.4257,
"step": 17180
},
{
"epoch": 48.7,
"learning_rate": 4.436842105263158e-06,
"loss": 1.4254,
"step": 17190
},
{
"epoch": 48.72,
"learning_rate": 4.421052631578947e-06,
"loss": 1.4259,
"step": 17200
},
{
"epoch": 48.75,
"learning_rate": 4.405263157894737e-06,
"loss": 1.4278,
"step": 17210
},
{
"epoch": 48.78,
"learning_rate": 4.3894736842105266e-06,
"loss": 1.4242,
"step": 17220
},
{
"epoch": 48.81,
"learning_rate": 4.373684210526316e-06,
"loss": 1.4246,
"step": 17230
},
{
"epoch": 48.84,
"learning_rate": 4.3578947368421055e-06,
"loss": 1.4262,
"step": 17240
},
{
"epoch": 48.86,
"learning_rate": 4.342105263157895e-06,
"loss": 1.4243,
"step": 17250
},
{
"epoch": 48.89,
"learning_rate": 4.3263157894736845e-06,
"loss": 1.4249,
"step": 17260
},
{
"epoch": 48.92,
"learning_rate": 4.310526315789474e-06,
"loss": 1.4274,
"step": 17270
},
{
"epoch": 48.95,
"learning_rate": 4.294736842105263e-06,
"loss": 1.4233,
"step": 17280
},
{
"epoch": 48.98,
"learning_rate": 4.278947368421052e-06,
"loss": 1.4248,
"step": 17290
},
{
"epoch": 49.01,
"learning_rate": 4.2631578947368425e-06,
"loss": 1.5487,
"step": 17300
},
{
"epoch": 49.04,
"learning_rate": 4.247368421052632e-06,
"loss": 1.4247,
"step": 17310
},
{
"epoch": 49.06,
"learning_rate": 4.2315789473684215e-06,
"loss": 1.4237,
"step": 17320
},
{
"epoch": 49.09,
"learning_rate": 4.215789473684211e-06,
"loss": 1.4244,
"step": 17330
},
{
"epoch": 49.12,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.4264,
"step": 17340
},
{
"epoch": 49.15,
"learning_rate": 4.184210526315789e-06,
"loss": 1.4297,
"step": 17350
},
{
"epoch": 49.18,
"learning_rate": 4.168421052631579e-06,
"loss": 1.4262,
"step": 17360
},
{
"epoch": 49.21,
"learning_rate": 4.152631578947368e-06,
"loss": 1.4243,
"step": 17370
},
{
"epoch": 49.23,
"learning_rate": 4.136842105263158e-06,
"loss": 1.426,
"step": 17380
},
{
"epoch": 49.26,
"learning_rate": 4.121052631578948e-06,
"loss": 1.4252,
"step": 17390
},
{
"epoch": 49.29,
"learning_rate": 4.105263157894737e-06,
"loss": 1.4254,
"step": 17400
},
{
"epoch": 49.32,
"learning_rate": 4.089473684210527e-06,
"loss": 1.4257,
"step": 17410
},
{
"epoch": 49.35,
"learning_rate": 4.0736842105263155e-06,
"loss": 1.424,
"step": 17420
},
{
"epoch": 49.38,
"learning_rate": 4.057894736842105e-06,
"loss": 1.4258,
"step": 17430
},
{
"epoch": 49.4,
"learning_rate": 4.0421052631578945e-06,
"loss": 1.4247,
"step": 17440
},
{
"epoch": 49.43,
"learning_rate": 4.026315789473684e-06,
"loss": 1.424,
"step": 17450
},
{
"epoch": 49.46,
"learning_rate": 4.010526315789474e-06,
"loss": 1.4246,
"step": 17460
},
{
"epoch": 49.49,
"learning_rate": 3.994736842105264e-06,
"loss": 1.4247,
"step": 17470
},
{
"epoch": 49.52,
"learning_rate": 3.978947368421053e-06,
"loss": 1.4225,
"step": 17480
},
{
"epoch": 49.55,
"learning_rate": 3.963157894736842e-06,
"loss": 1.4285,
"step": 17490
},
{
"epoch": 49.57,
"learning_rate": 3.9473684210526315e-06,
"loss": 1.4258,
"step": 17500
},
{
"epoch": 49.6,
"learning_rate": 3.931578947368421e-06,
"loss": 1.4241,
"step": 17510
},
{
"epoch": 49.63,
"learning_rate": 3.9157894736842104e-06,
"loss": 1.4243,
"step": 17520
},
{
"epoch": 49.66,
"learning_rate": 3.9e-06,
"loss": 1.4268,
"step": 17530
},
{
"epoch": 49.69,
"learning_rate": 3.884210526315789e-06,
"loss": 1.4265,
"step": 17540
},
{
"epoch": 49.71,
"learning_rate": 3.86842105263158e-06,
"loss": 1.4227,
"step": 17550
},
{
"epoch": 49.74,
"learning_rate": 3.852631578947368e-06,
"loss": 1.426,
"step": 17560
},
{
"epoch": 49.77,
"learning_rate": 3.836842105263158e-06,
"loss": 1.4237,
"step": 17570
},
{
"epoch": 49.8,
"learning_rate": 3.821052631578947e-06,
"loss": 1.4235,
"step": 17580
},
{
"epoch": 49.83,
"learning_rate": 3.805263157894737e-06,
"loss": 1.4249,
"step": 17590
},
{
"epoch": 49.86,
"learning_rate": 3.7894736842105264e-06,
"loss": 1.4235,
"step": 17600
},
{
"epoch": 49.88,
"learning_rate": 3.7736842105263163e-06,
"loss": 1.4244,
"step": 17610
},
{
"epoch": 49.91,
"learning_rate": 3.7578947368421058e-06,
"loss": 1.4285,
"step": 17620
},
{
"epoch": 49.94,
"learning_rate": 3.742105263157895e-06,
"loss": 1.4255,
"step": 17630
},
{
"epoch": 49.97,
"learning_rate": 3.7263157894736843e-06,
"loss": 1.425,
"step": 17640
},
{
"epoch": 50.0,
"learning_rate": 3.710526315789474e-06,
"loss": 1.425,
"step": 17650
},
{
"epoch": 50.03,
"learning_rate": 3.6947368421052633e-06,
"loss": 1.5484,
"step": 17660
},
{
"epoch": 50.06,
"learning_rate": 3.678947368421053e-06,
"loss": 1.4246,
"step": 17670
},
{
"epoch": 50.08,
"learning_rate": 3.663157894736842e-06,
"loss": 1.4261,
"step": 17680
},
{
"epoch": 50.11,
"learning_rate": 3.6473684210526318e-06,
"loss": 1.4246,
"step": 17690
},
{
"epoch": 50.14,
"learning_rate": 3.6315789473684213e-06,
"loss": 1.4268,
"step": 17700
},
{
"epoch": 50.17,
"learning_rate": 3.6157894736842108e-06,
"loss": 1.4249,
"step": 17710
},
{
"epoch": 50.2,
"learning_rate": 3.6e-06,
"loss": 1.4249,
"step": 17720
},
{
"epoch": 50.23,
"learning_rate": 3.5842105263157898e-06,
"loss": 1.4245,
"step": 17730
},
{
"epoch": 50.25,
"learning_rate": 3.5684210526315792e-06,
"loss": 1.4253,
"step": 17740
},
{
"epoch": 50.28,
"learning_rate": 3.5526315789473683e-06,
"loss": 1.426,
"step": 17750
},
{
"epoch": 50.31,
"learning_rate": 3.536842105263158e-06,
"loss": 1.4276,
"step": 17760
},
{
"epoch": 50.34,
"learning_rate": 3.5210526315789473e-06,
"loss": 1.4233,
"step": 17770
},
{
"epoch": 50.37,
"learning_rate": 3.505263157894737e-06,
"loss": 1.4239,
"step": 17780
},
{
"epoch": 50.4,
"learning_rate": 3.4894736842105263e-06,
"loss": 1.4237,
"step": 17790
},
{
"epoch": 50.42,
"learning_rate": 3.4736842105263158e-06,
"loss": 1.4244,
"step": 17800
},
{
"epoch": 50.45,
"learning_rate": 3.4578947368421053e-06,
"loss": 1.4239,
"step": 17810
},
{
"epoch": 50.48,
"learning_rate": 3.4421052631578947e-06,
"loss": 1.4247,
"step": 17820
},
{
"epoch": 50.51,
"learning_rate": 3.4263157894736842e-06,
"loss": 1.4261,
"step": 17830
},
{
"epoch": 50.54,
"learning_rate": 3.4105263157894737e-06,
"loss": 1.4238,
"step": 17840
},
{
"epoch": 50.57,
"learning_rate": 3.3947368421052632e-06,
"loss": 1.4244,
"step": 17850
},
{
"epoch": 50.59,
"learning_rate": 3.3789473684210527e-06,
"loss": 1.4234,
"step": 17860
},
{
"epoch": 50.62,
"learning_rate": 3.363157894736842e-06,
"loss": 1.424,
"step": 17870
},
{
"epoch": 50.65,
"learning_rate": 3.3473684210526317e-06,
"loss": 1.4319,
"step": 17880
},
{
"epoch": 50.68,
"learning_rate": 3.331578947368421e-06,
"loss": 1.4242,
"step": 17890
},
{
"epoch": 50.71,
"learning_rate": 3.3157894736842107e-06,
"loss": 1.4245,
"step": 17900
},
{
"epoch": 50.73,
"learning_rate": 3.3e-06,
"loss": 1.4315,
"step": 17910
},
{
"epoch": 50.76,
"learning_rate": 3.2842105263157897e-06,
"loss": 1.427,
"step": 17920
},
{
"epoch": 50.79,
"learning_rate": 3.2684210526315787e-06,
"loss": 1.4243,
"step": 17930
},
{
"epoch": 50.82,
"learning_rate": 3.2526315789473686e-06,
"loss": 1.4257,
"step": 17940
},
{
"epoch": 50.85,
"learning_rate": 3.236842105263158e-06,
"loss": 1.4236,
"step": 17950
},
{
"epoch": 50.88,
"learning_rate": 3.2210526315789476e-06,
"loss": 1.4241,
"step": 17960
},
{
"epoch": 50.9,
"learning_rate": 3.2052631578947367e-06,
"loss": 1.424,
"step": 17970
},
{
"epoch": 50.93,
"learning_rate": 3.1894736842105266e-06,
"loss": 1.4244,
"step": 17980
},
{
"epoch": 50.96,
"learning_rate": 3.173684210526316e-06,
"loss": 1.4231,
"step": 17990
},
{
"epoch": 50.99,
"learning_rate": 3.157894736842105e-06,
"loss": 1.4248,
"step": 18000
},
{
"epoch": 50.99,
"eval_denotation_accuracy": 0.5817732250088308,
"eval_loss": 2.1932122707366943,
"eval_runtime": 338.4738,
"eval_samples_per_second": 8.364,
"eval_steps_per_second": 2.092,
"step": 18000
},
{
"epoch": 51.02,
"learning_rate": 3.1421052631578947e-06,
"loss": 1.548,
"step": 18010
},
{
"epoch": 51.05,
"learning_rate": 3.126315789473684e-06,
"loss": 1.4236,
"step": 18020
},
{
"epoch": 51.08,
"learning_rate": 3.110526315789474e-06,
"loss": 1.423,
"step": 18030
},
{
"epoch": 51.1,
"learning_rate": 3.094736842105263e-06,
"loss": 1.4243,
"step": 18040
},
{
"epoch": 51.13,
"learning_rate": 3.0789473684210526e-06,
"loss": 1.432,
"step": 18050
},
{
"epoch": 51.16,
"learning_rate": 3.063157894736842e-06,
"loss": 1.4236,
"step": 18060
},
{
"epoch": 51.19,
"learning_rate": 3.0473684210526316e-06,
"loss": 1.4244,
"step": 18070
},
{
"epoch": 51.22,
"learning_rate": 3.031578947368421e-06,
"loss": 1.4245,
"step": 18080
},
{
"epoch": 51.25,
"learning_rate": 3.0157894736842106e-06,
"loss": 1.4227,
"step": 18090
},
{
"epoch": 51.27,
"learning_rate": 3e-06,
"loss": 1.4227,
"step": 18100
},
{
"epoch": 51.3,
"learning_rate": 2.9842105263157896e-06,
"loss": 1.4233,
"step": 18110
},
{
"epoch": 51.33,
"learning_rate": 2.968421052631579e-06,
"loss": 1.4241,
"step": 18120
},
{
"epoch": 51.36,
"learning_rate": 2.9526315789473685e-06,
"loss": 1.4239,
"step": 18130
},
{
"epoch": 51.39,
"learning_rate": 2.9368421052631576e-06,
"loss": 1.424,
"step": 18140
},
{
"epoch": 51.42,
"learning_rate": 2.9210526315789475e-06,
"loss": 1.4234,
"step": 18150
},
{
"epoch": 51.44,
"learning_rate": 2.905263157894737e-06,
"loss": 1.4247,
"step": 18160
},
{
"epoch": 51.47,
"learning_rate": 2.8894736842105265e-06,
"loss": 1.4223,
"step": 18170
},
{
"epoch": 51.5,
"learning_rate": 2.8736842105263156e-06,
"loss": 1.4246,
"step": 18180
},
{
"epoch": 51.53,
"learning_rate": 2.8578947368421055e-06,
"loss": 1.424,
"step": 18190
},
{
"epoch": 51.56,
"learning_rate": 2.842105263157895e-06,
"loss": 1.4245,
"step": 18200
},
{
"epoch": 51.58,
"learning_rate": 2.826315789473684e-06,
"loss": 1.4237,
"step": 18210
},
{
"epoch": 51.61,
"learning_rate": 2.8105263157894735e-06,
"loss": 1.4234,
"step": 18220
},
{
"epoch": 51.64,
"learning_rate": 2.7947368421052635e-06,
"loss": 1.4231,
"step": 18230
},
{
"epoch": 51.67,
"learning_rate": 2.778947368421053e-06,
"loss": 1.4237,
"step": 18240
},
{
"epoch": 51.7,
"learning_rate": 2.763157894736842e-06,
"loss": 1.425,
"step": 18250
},
{
"epoch": 51.73,
"learning_rate": 2.7473684210526315e-06,
"loss": 1.4223,
"step": 18260
},
{
"epoch": 51.75,
"learning_rate": 2.731578947368421e-06,
"loss": 1.4241,
"step": 18270
},
{
"epoch": 51.78,
"learning_rate": 2.715789473684211e-06,
"loss": 1.4231,
"step": 18280
},
{
"epoch": 51.81,
"learning_rate": 2.7e-06,
"loss": 1.4242,
"step": 18290
},
{
"epoch": 51.84,
"learning_rate": 2.6842105263157895e-06,
"loss": 1.4239,
"step": 18300
},
{
"epoch": 51.87,
"learning_rate": 2.668421052631579e-06,
"loss": 1.4231,
"step": 18310
},
{
"epoch": 51.9,
"learning_rate": 2.6526315789473685e-06,
"loss": 1.4238,
"step": 18320
},
{
"epoch": 51.92,
"learning_rate": 2.636842105263158e-06,
"loss": 1.4236,
"step": 18330
},
{
"epoch": 51.95,
"learning_rate": 2.6210526315789474e-06,
"loss": 1.4251,
"step": 18340
},
{
"epoch": 51.98,
"learning_rate": 2.605263157894737e-06,
"loss": 1.4227,
"step": 18350
},
{
"epoch": 52.01,
"learning_rate": 2.5894736842105264e-06,
"loss": 1.5467,
"step": 18360
},
{
"epoch": 52.04,
"learning_rate": 2.573684210526316e-06,
"loss": 1.4229,
"step": 18370
},
{
"epoch": 52.07,
"learning_rate": 2.5578947368421054e-06,
"loss": 1.4236,
"step": 18380
},
{
"epoch": 52.1,
"learning_rate": 2.5421052631578945e-06,
"loss": 1.4228,
"step": 18390
},
{
"epoch": 52.12,
"learning_rate": 2.5263157894736844e-06,
"loss": 1.4229,
"step": 18400
},
{
"epoch": 52.15,
"learning_rate": 2.510526315789474e-06,
"loss": 1.4234,
"step": 18410
},
{
"epoch": 52.18,
"learning_rate": 2.4947368421052634e-06,
"loss": 1.4245,
"step": 18420
},
{
"epoch": 52.21,
"learning_rate": 2.4789473684210524e-06,
"loss": 1.4238,
"step": 18430
},
{
"epoch": 52.24,
"learning_rate": 2.4631578947368424e-06,
"loss": 1.4229,
"step": 18440
},
{
"epoch": 52.27,
"learning_rate": 2.447368421052632e-06,
"loss": 1.4236,
"step": 18450
},
{
"epoch": 52.29,
"learning_rate": 2.431578947368421e-06,
"loss": 1.4233,
"step": 18460
},
{
"epoch": 52.32,
"learning_rate": 2.4157894736842104e-06,
"loss": 1.4237,
"step": 18470
},
{
"epoch": 52.35,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.4244,
"step": 18480
},
{
"epoch": 52.38,
"learning_rate": 2.38421052631579e-06,
"loss": 1.4237,
"step": 18490
},
{
"epoch": 52.41,
"learning_rate": 2.368421052631579e-06,
"loss": 1.4231,
"step": 18500
},
{
"epoch": 52.44,
"learning_rate": 2.3526315789473684e-06,
"loss": 1.4243,
"step": 18510
},
{
"epoch": 52.46,
"learning_rate": 2.336842105263158e-06,
"loss": 1.4236,
"step": 18520
},
{
"epoch": 52.49,
"learning_rate": 2.3210526315789473e-06,
"loss": 1.4243,
"step": 18530
},
{
"epoch": 52.52,
"learning_rate": 2.305263157894737e-06,
"loss": 1.4224,
"step": 18540
},
{
"epoch": 52.55,
"learning_rate": 2.2894736842105263e-06,
"loss": 1.4223,
"step": 18550
},
{
"epoch": 52.58,
"learning_rate": 2.273684210526316e-06,
"loss": 1.4245,
"step": 18560
},
{
"epoch": 52.6,
"learning_rate": 2.2578947368421053e-06,
"loss": 1.4243,
"step": 18570
},
{
"epoch": 52.63,
"learning_rate": 2.242105263157895e-06,
"loss": 1.4226,
"step": 18580
},
{
"epoch": 52.66,
"learning_rate": 2.2263157894736843e-06,
"loss": 1.4248,
"step": 18590
},
{
"epoch": 52.69,
"learning_rate": 2.2105263157894734e-06,
"loss": 1.4232,
"step": 18600
},
{
"epoch": 52.72,
"learning_rate": 2.1947368421052633e-06,
"loss": 1.4236,
"step": 18610
},
{
"epoch": 52.75,
"learning_rate": 2.1789473684210528e-06,
"loss": 1.4237,
"step": 18620
},
{
"epoch": 52.77,
"learning_rate": 2.1631578947368423e-06,
"loss": 1.4223,
"step": 18630
},
{
"epoch": 52.8,
"learning_rate": 2.1473684210526313e-06,
"loss": 1.4226,
"step": 18640
},
{
"epoch": 52.83,
"learning_rate": 2.1315789473684212e-06,
"loss": 1.4217,
"step": 18650
},
{
"epoch": 52.86,
"learning_rate": 2.1157894736842107e-06,
"loss": 1.4244,
"step": 18660
},
{
"epoch": 52.89,
"learning_rate": 2.1000000000000002e-06,
"loss": 1.4257,
"step": 18670
},
{
"epoch": 52.92,
"learning_rate": 2.0842105263157893e-06,
"loss": 1.4224,
"step": 18680
},
{
"epoch": 52.94,
"learning_rate": 2.068421052631579e-06,
"loss": 1.4231,
"step": 18690
},
{
"epoch": 52.97,
"learning_rate": 2.0526315789473687e-06,
"loss": 1.423,
"step": 18700
},
{
"epoch": 53.0,
"learning_rate": 2.0368421052631578e-06,
"loss": 1.5492,
"step": 18710
},
{
"epoch": 53.03,
"learning_rate": 2.0210526315789473e-06,
"loss": 1.4238,
"step": 18720
},
{
"epoch": 53.06,
"learning_rate": 2.005263157894737e-06,
"loss": 1.4227,
"step": 18730
},
{
"epoch": 53.09,
"learning_rate": 1.9894736842105267e-06,
"loss": 1.4226,
"step": 18740
},
{
"epoch": 53.12,
"learning_rate": 1.9736842105263157e-06,
"loss": 1.4235,
"step": 18750
},
{
"epoch": 53.14,
"learning_rate": 1.9578947368421052e-06,
"loss": 1.4218,
"step": 18760
},
{
"epoch": 53.17,
"learning_rate": 1.9421052631578947e-06,
"loss": 1.4228,
"step": 18770
},
{
"epoch": 53.2,
"learning_rate": 1.926315789473684e-06,
"loss": 1.4231,
"step": 18780
},
{
"epoch": 53.23,
"learning_rate": 1.9105263157894737e-06,
"loss": 1.422,
"step": 18790
},
{
"epoch": 53.26,
"learning_rate": 1.8947368421052632e-06,
"loss": 1.4231,
"step": 18800
},
{
"epoch": 53.29,
"learning_rate": 1.8789473684210529e-06,
"loss": 1.4239,
"step": 18810
},
{
"epoch": 53.31,
"learning_rate": 1.8631578947368422e-06,
"loss": 1.423,
"step": 18820
},
{
"epoch": 53.34,
"learning_rate": 1.8473684210526317e-06,
"loss": 1.4238,
"step": 18830
},
{
"epoch": 53.37,
"learning_rate": 1.831578947368421e-06,
"loss": 1.4233,
"step": 18840
},
{
"epoch": 53.4,
"learning_rate": 1.8157894736842106e-06,
"loss": 1.424,
"step": 18850
},
{
"epoch": 53.43,
"learning_rate": 1.8e-06,
"loss": 1.4224,
"step": 18860
},
{
"epoch": 53.45,
"learning_rate": 1.7842105263157896e-06,
"loss": 1.4238,
"step": 18870
},
{
"epoch": 53.48,
"learning_rate": 1.768421052631579e-06,
"loss": 1.4226,
"step": 18880
},
{
"epoch": 53.51,
"learning_rate": 1.7526315789473686e-06,
"loss": 1.4226,
"step": 18890
},
{
"epoch": 53.54,
"learning_rate": 1.7368421052631579e-06,
"loss": 1.4234,
"step": 18900
},
{
"epoch": 53.57,
"learning_rate": 1.7210526315789474e-06,
"loss": 1.4231,
"step": 18910
},
{
"epoch": 53.6,
"learning_rate": 1.7052631578947369e-06,
"loss": 1.4234,
"step": 18920
},
{
"epoch": 53.62,
"learning_rate": 1.6894736842105264e-06,
"loss": 1.4227,
"step": 18930
},
{
"epoch": 53.65,
"learning_rate": 1.6736842105263158e-06,
"loss": 1.4225,
"step": 18940
},
{
"epoch": 53.68,
"learning_rate": 1.6578947368421053e-06,
"loss": 1.4225,
"step": 18950
},
{
"epoch": 53.71,
"learning_rate": 1.6421052631578948e-06,
"loss": 1.4233,
"step": 18960
},
{
"epoch": 53.74,
"learning_rate": 1.6263157894736843e-06,
"loss": 1.4247,
"step": 18970
},
{
"epoch": 53.77,
"learning_rate": 1.6105263157894738e-06,
"loss": 1.4239,
"step": 18980
},
{
"epoch": 53.79,
"learning_rate": 1.5947368421052633e-06,
"loss": 1.428,
"step": 18990
},
{
"epoch": 53.82,
"learning_rate": 1.5789473684210526e-06,
"loss": 1.423,
"step": 19000
},
{
"epoch": 53.82,
"eval_denotation_accuracy": 0.5782409042741081,
"eval_loss": 2.194387435913086,
"eval_runtime": 320.0346,
"eval_samples_per_second": 8.846,
"eval_steps_per_second": 2.212,
"step": 19000
},
{
"epoch": 53.85,
"learning_rate": 1.563157894736842e-06,
"loss": 1.4241,
"step": 19010
},
{
"epoch": 53.88,
"learning_rate": 1.5473684210526316e-06,
"loss": 1.4229,
"step": 19020
},
{
"epoch": 53.91,
"learning_rate": 1.531578947368421e-06,
"loss": 1.4278,
"step": 19030
},
{
"epoch": 53.94,
"learning_rate": 1.5157894736842105e-06,
"loss": 1.4226,
"step": 19040
},
{
"epoch": 53.96,
"learning_rate": 1.5e-06,
"loss": 1.4231,
"step": 19050
},
{
"epoch": 53.99,
"learning_rate": 1.4842105263157895e-06,
"loss": 1.4231,
"step": 19060
},
{
"epoch": 54.02,
"learning_rate": 1.4684210526315788e-06,
"loss": 1.548,
"step": 19070
},
{
"epoch": 54.05,
"learning_rate": 1.4526315789473685e-06,
"loss": 1.4215,
"step": 19080
},
{
"epoch": 54.08,
"learning_rate": 1.4368421052631578e-06,
"loss": 1.4229,
"step": 19090
},
{
"epoch": 54.11,
"learning_rate": 1.4210526315789475e-06,
"loss": 1.4234,
"step": 19100
},
{
"epoch": 54.14,
"learning_rate": 1.4052631578947368e-06,
"loss": 1.4227,
"step": 19110
},
{
"epoch": 54.16,
"learning_rate": 1.3894736842105265e-06,
"loss": 1.423,
"step": 19120
},
{
"epoch": 54.19,
"learning_rate": 1.3736842105263158e-06,
"loss": 1.4233,
"step": 19130
},
{
"epoch": 54.22,
"learning_rate": 1.3578947368421055e-06,
"loss": 1.4235,
"step": 19140
},
{
"epoch": 54.25,
"learning_rate": 1.3421052631578947e-06,
"loss": 1.4223,
"step": 19150
},
{
"epoch": 54.28,
"learning_rate": 1.3263157894736842e-06,
"loss": 1.4233,
"step": 19160
},
{
"epoch": 54.31,
"learning_rate": 1.3105263157894737e-06,
"loss": 1.4229,
"step": 19170
},
{
"epoch": 54.33,
"learning_rate": 1.2947368421052632e-06,
"loss": 1.4233,
"step": 19180
},
{
"epoch": 54.36,
"learning_rate": 1.2789473684210527e-06,
"loss": 1.4234,
"step": 19190
},
{
"epoch": 54.39,
"learning_rate": 1.2631578947368422e-06,
"loss": 1.4231,
"step": 19200
},
{
"epoch": 54.42,
"learning_rate": 1.2473684210526317e-06,
"loss": 1.4236,
"step": 19210
},
{
"epoch": 54.45,
"learning_rate": 1.2315789473684212e-06,
"loss": 1.4234,
"step": 19220
},
{
"epoch": 54.47,
"learning_rate": 1.2157894736842105e-06,
"loss": 1.4228,
"step": 19230
},
{
"epoch": 54.5,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.423,
"step": 19240
},
{
"epoch": 54.53,
"learning_rate": 1.1842105263157894e-06,
"loss": 1.4237,
"step": 19250
},
{
"epoch": 54.56,
"learning_rate": 1.168421052631579e-06,
"loss": 1.4231,
"step": 19260
},
{
"epoch": 54.59,
"learning_rate": 1.1526315789473684e-06,
"loss": 1.4231,
"step": 19270
},
{
"epoch": 54.62,
"learning_rate": 1.136842105263158e-06,
"loss": 1.4223,
"step": 19280
},
{
"epoch": 54.64,
"learning_rate": 1.1210526315789474e-06,
"loss": 1.4228,
"step": 19290
},
{
"epoch": 54.67,
"learning_rate": 1.1052631578947367e-06,
"loss": 1.4224,
"step": 19300
},
{
"epoch": 54.7,
"learning_rate": 1.0894736842105264e-06,
"loss": 1.4242,
"step": 19310
},
{
"epoch": 54.73,
"learning_rate": 1.0736842105263157e-06,
"loss": 1.4224,
"step": 19320
},
{
"epoch": 54.76,
"learning_rate": 1.0578947368421054e-06,
"loss": 1.422,
"step": 19330
},
{
"epoch": 54.79,
"learning_rate": 1.0421052631578946e-06,
"loss": 1.4226,
"step": 19340
},
{
"epoch": 54.81,
"learning_rate": 1.0263157894736843e-06,
"loss": 1.4228,
"step": 19350
},
{
"epoch": 54.84,
"learning_rate": 1.0105263157894736e-06,
"loss": 1.4228,
"step": 19360
},
{
"epoch": 54.87,
"learning_rate": 9.947368421052633e-07,
"loss": 1.4232,
"step": 19370
},
{
"epoch": 54.9,
"learning_rate": 9.789473684210526e-07,
"loss": 1.4241,
"step": 19380
},
{
"epoch": 54.93,
"learning_rate": 9.63157894736842e-07,
"loss": 1.4232,
"step": 19390
},
{
"epoch": 54.96,
"learning_rate": 9.473684210526316e-07,
"loss": 1.4225,
"step": 19400
},
{
"epoch": 54.98,
"learning_rate": 9.315789473684211e-07,
"loss": 1.4237,
"step": 19410
},
{
"epoch": 55.01,
"learning_rate": 9.157894736842105e-07,
"loss": 1.5472,
"step": 19420
},
{
"epoch": 55.04,
"learning_rate": 9e-07,
"loss": 1.4235,
"step": 19430
},
{
"epoch": 55.07,
"learning_rate": 8.842105263157895e-07,
"loss": 1.422,
"step": 19440
},
{
"epoch": 55.1,
"learning_rate": 8.684210526315789e-07,
"loss": 1.4247,
"step": 19450
},
{
"epoch": 55.13,
"learning_rate": 8.526315789473684e-07,
"loss": 1.4235,
"step": 19460
},
{
"epoch": 55.16,
"learning_rate": 8.368421052631579e-07,
"loss": 1.4226,
"step": 19470
},
{
"epoch": 55.18,
"learning_rate": 8.210526315789474e-07,
"loss": 1.4232,
"step": 19480
},
{
"epoch": 55.21,
"learning_rate": 8.052631578947369e-07,
"loss": 1.4245,
"step": 19490
},
{
"epoch": 55.24,
"learning_rate": 7.894736842105263e-07,
"loss": 1.4234,
"step": 19500
},
{
"epoch": 55.27,
"learning_rate": 7.736842105263158e-07,
"loss": 1.4234,
"step": 19510
},
{
"epoch": 55.3,
"learning_rate": 7.578947368421053e-07,
"loss": 1.4215,
"step": 19520
},
{
"epoch": 55.32,
"learning_rate": 7.421052631578948e-07,
"loss": 1.4312,
"step": 19530
},
{
"epoch": 55.35,
"learning_rate": 7.263157894736843e-07,
"loss": 1.423,
"step": 19540
},
{
"epoch": 55.38,
"learning_rate": 7.105263157894737e-07,
"loss": 1.4222,
"step": 19550
},
{
"epoch": 55.41,
"learning_rate": 6.947368421052632e-07,
"loss": 1.4234,
"step": 19560
},
{
"epoch": 55.44,
"learning_rate": 6.789473684210527e-07,
"loss": 1.4215,
"step": 19570
},
{
"epoch": 55.47,
"learning_rate": 6.631578947368421e-07,
"loss": 1.4229,
"step": 19580
},
{
"epoch": 55.49,
"learning_rate": 6.473684210526316e-07,
"loss": 1.4231,
"step": 19590
},
{
"epoch": 55.52,
"learning_rate": 6.315789473684211e-07,
"loss": 1.4225,
"step": 19600
},
{
"epoch": 55.55,
"learning_rate": 6.157894736842106e-07,
"loss": 1.4227,
"step": 19610
},
{
"epoch": 55.58,
"learning_rate": 6.000000000000001e-07,
"loss": 1.4216,
"step": 19620
},
{
"epoch": 55.61,
"learning_rate": 5.842105263157895e-07,
"loss": 1.4228,
"step": 19630
},
{
"epoch": 55.64,
"learning_rate": 5.68421052631579e-07,
"loss": 1.4228,
"step": 19640
},
{
"epoch": 55.66,
"learning_rate": 5.526315789473683e-07,
"loss": 1.4227,
"step": 19650
},
{
"epoch": 55.69,
"learning_rate": 5.368421052631578e-07,
"loss": 1.4222,
"step": 19660
},
{
"epoch": 55.72,
"learning_rate": 5.210526315789473e-07,
"loss": 1.4227,
"step": 19670
},
{
"epoch": 55.75,
"learning_rate": 5.052631578947368e-07,
"loss": 1.4226,
"step": 19680
},
{
"epoch": 55.78,
"learning_rate": 4.894736842105263e-07,
"loss": 1.4226,
"step": 19690
},
{
"epoch": 55.81,
"learning_rate": 4.736842105263158e-07,
"loss": 1.4224,
"step": 19700
},
{
"epoch": 55.83,
"learning_rate": 4.5789473684210523e-07,
"loss": 1.4264,
"step": 19710
},
{
"epoch": 55.86,
"learning_rate": 4.421052631578947e-07,
"loss": 1.4237,
"step": 19720
},
{
"epoch": 55.89,
"learning_rate": 4.263157894736842e-07,
"loss": 1.4225,
"step": 19730
},
{
"epoch": 55.92,
"learning_rate": 4.105263157894737e-07,
"loss": 1.4229,
"step": 19740
},
{
"epoch": 55.95,
"learning_rate": 3.9473684210526315e-07,
"loss": 1.4229,
"step": 19750
},
{
"epoch": 55.97,
"learning_rate": 3.7894736842105264e-07,
"loss": 1.4237,
"step": 19760
},
{
"epoch": 56.01,
"learning_rate": 3.6315789473684213e-07,
"loss": 1.5472,
"step": 19770
},
{
"epoch": 56.03,
"learning_rate": 3.473684210526316e-07,
"loss": 1.4223,
"step": 19780
},
{
"epoch": 56.06,
"learning_rate": 3.3157894736842106e-07,
"loss": 1.4225,
"step": 19790
},
{
"epoch": 56.09,
"learning_rate": 3.1578947368421055e-07,
"loss": 1.4267,
"step": 19800
},
{
"epoch": 56.12,
"learning_rate": 3.0000000000000004e-07,
"loss": 1.423,
"step": 19810
},
{
"epoch": 56.15,
"learning_rate": 2.842105263157895e-07,
"loss": 1.4227,
"step": 19820
},
{
"epoch": 56.18,
"learning_rate": 2.684210526315789e-07,
"loss": 1.4228,
"step": 19830
},
{
"epoch": 56.2,
"learning_rate": 2.526315789473684e-07,
"loss": 1.4227,
"step": 19840
},
{
"epoch": 56.23,
"learning_rate": 2.368421052631579e-07,
"loss": 1.422,
"step": 19850
},
{
"epoch": 56.26,
"learning_rate": 2.2105263157894736e-07,
"loss": 1.4234,
"step": 19860
},
{
"epoch": 56.29,
"learning_rate": 2.0526315789473685e-07,
"loss": 1.4222,
"step": 19870
},
{
"epoch": 56.32,
"learning_rate": 1.8947368421052632e-07,
"loss": 1.4227,
"step": 19880
},
{
"epoch": 56.34,
"learning_rate": 1.736842105263158e-07,
"loss": 1.4303,
"step": 19890
},
{
"epoch": 56.37,
"learning_rate": 1.5789473684210527e-07,
"loss": 1.4238,
"step": 19900
},
{
"epoch": 56.4,
"learning_rate": 1.4210526315789474e-07,
"loss": 1.4224,
"step": 19910
},
{
"epoch": 56.43,
"learning_rate": 1.263157894736842e-07,
"loss": 1.4222,
"step": 19920
},
{
"epoch": 56.46,
"learning_rate": 1.1052631578947368e-07,
"loss": 1.4226,
"step": 19930
},
{
"epoch": 56.49,
"learning_rate": 9.473684210526316e-08,
"loss": 1.4233,
"step": 19940
},
{
"epoch": 56.51,
"learning_rate": 7.894736842105264e-08,
"loss": 1.4236,
"step": 19950
},
{
"epoch": 56.54,
"learning_rate": 6.31578947368421e-08,
"loss": 1.4224,
"step": 19960
},
{
"epoch": 56.57,
"learning_rate": 4.736842105263158e-08,
"loss": 1.4224,
"step": 19970
},
{
"epoch": 56.6,
"learning_rate": 3.157894736842105e-08,
"loss": 1.4238,
"step": 19980
},
{
"epoch": 56.63,
"learning_rate": 1.5789473684210525e-08,
"loss": 1.4241,
"step": 19990
},
{
"epoch": 56.66,
"learning_rate": 0.0,
"loss": 1.4226,
"step": 20000
},
{
"epoch": 56.66,
"eval_denotation_accuracy": 0.5793006004945249,
"eval_loss": 2.197631359100342,
"eval_runtime": 322.9471,
"eval_samples_per_second": 8.766,
"eval_steps_per_second": 2.192,
"step": 20000
},
{
"epoch": 56.66,
"step": 20000,
"total_flos": 1.1925130485460255e+18,
"train_loss": 1.547329451227188,
"train_runtime": 59809.286,
"train_samples_per_second": 10.701,
"train_steps_per_second": 0.334
}
],
"max_steps": 20000,
"num_train_epochs": 57,
"total_flos": 1.1925130485460255e+18,
"trial_name": null,
"trial_params": null
}