error577 commited on
Commit
36aeb44
·
verified ·
1 Parent(s): 19c172f

Training in progress, step 1674, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19861dcf8a6a8f8201539e1292448ff8fde1d95562739a22a4e9cb4605bdd467
3
  size 100690184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819b69318860d5951783bfc528a2084dfcda651d1448098a9385f110007dc9fb
3
  size 100690184
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcb350e88869cd3711e1fe790137e2796593515caf7899b21bb1daf01f030571
3
  size 201491258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8dbad136c676282d7efd0afd0702cbcde32f375648a43b4e368c5a9bde5bcda
3
  size 201491258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9abd993835e132a028c4f2b8840728fbf0491d982783798ace8f45074d8789e5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1acc65d1d3866a00728b9081b04cc25d66babddcb236f1ba29bec083e40a3de0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b8bd8f62612ef7e1c9aaca9278fcaf68baaf66d9c89cc173c01f8a58701a2f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce38738d196548234ebf53011cfaf5415f3264a1b3a4e20437d972670cfd877b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.9557929039001465,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1600",
4
- "epoch": 2.865137101287073,
5
  "eval_steps": 200,
6
- "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11279,6 +11279,524 @@
11279
  "eval_samples_per_second": 23.388,
11280
  "eval_steps_per_second": 5.847,
11281
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11282
  }
11283
  ],
11284
  "logging_steps": 1,
@@ -11302,12 +11820,12 @@
11302
  "should_evaluate": false,
11303
  "should_log": false,
11304
  "should_save": true,
11305
- "should_training_stop": false
11306
  },
11307
  "attributes": {}
11308
  }
11309
  },
11310
- "total_flos": 2.4278396638435738e+17,
11311
  "train_batch_size": 4,
11312
  "trial_name": null,
11313
  "trial_params": null
 
1
  {
2
  "best_metric": 2.9557929039001465,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1600",
4
+ "epoch": 2.9976496922216005,
5
  "eval_steps": 200,
6
+ "global_step": 1674,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11279
  "eval_samples_per_second": 23.388,
11280
  "eval_steps_per_second": 5.847,
11281
  "step": 1600
11282
+ },
11283
+ {
11284
+ "epoch": 2.8669278119753776,
11285
+ "grad_norm": 10.28209114074707,
11286
+ "learning_rate": 9.482465384252792e-07,
11287
+ "loss": 44.263,
11288
+ "step": 1601
11289
+ },
11290
+ {
11291
+ "epoch": 2.868718522663682,
11292
+ "grad_norm": 10.77933120727539,
11293
+ "learning_rate": 9.224848654469931e-07,
11294
+ "loss": 45.5788,
11295
+ "step": 1602
11296
+ },
11297
+ {
11298
+ "epoch": 2.870509233351987,
11299
+ "grad_norm": 10.603645324707031,
11300
+ "learning_rate": 8.970763496797152e-07,
11301
+ "loss": 46.1126,
11302
+ "step": 1603
11303
+ },
11304
+ {
11305
+ "epoch": 2.872299944040291,
11306
+ "grad_norm": 10.797646522521973,
11307
+ "learning_rate": 8.720210816909435e-07,
11308
+ "loss": 45.0403,
11309
+ "step": 1604
11310
+ },
11311
+ {
11312
+ "epoch": 2.8740906547285956,
11313
+ "grad_norm": 11.36728572845459,
11314
+ "learning_rate": 8.473191507889944e-07,
11315
+ "loss": 46.0607,
11316
+ "step": 1605
11317
+ },
11318
+ {
11319
+ "epoch": 2.8758813654169,
11320
+ "grad_norm": 11.209030151367188,
11321
+ "learning_rate": 8.229706450227803e-07,
11322
+ "loss": 45.0385,
11323
+ "step": 1606
11324
+ },
11325
+ {
11326
+ "epoch": 2.8776720761052044,
11327
+ "grad_norm": 12.537467956542969,
11328
+ "learning_rate": 7.989756511814106e-07,
11329
+ "loss": 46.5373,
11330
+ "step": 1607
11331
+ },
11332
+ {
11333
+ "epoch": 2.8794627867935088,
11334
+ "grad_norm": 12.144121170043945,
11335
+ "learning_rate": 7.753342547939357e-07,
11336
+ "loss": 47.1403,
11337
+ "step": 1608
11338
+ },
11339
+ {
11340
+ "epoch": 2.881253497481813,
11341
+ "grad_norm": 12.328975677490234,
11342
+ "learning_rate": 7.520465401290033e-07,
11343
+ "loss": 46.1814,
11344
+ "step": 1609
11345
+ },
11346
+ {
11347
+ "epoch": 2.8830442081701175,
11348
+ "grad_norm": 12.097262382507324,
11349
+ "learning_rate": 7.291125901946027e-07,
11350
+ "loss": 44.0594,
11351
+ "step": 1610
11352
+ },
11353
+ {
11354
+ "epoch": 2.884834918858422,
11355
+ "grad_norm": 14.521700859069824,
11356
+ "learning_rate": 7.065324867377321e-07,
11357
+ "loss": 46.0011,
11358
+ "step": 1611
11359
+ },
11360
+ {
11361
+ "epoch": 2.8866256295467263,
11362
+ "grad_norm": 15.199280738830566,
11363
+ "learning_rate": 6.843063102441316e-07,
11364
+ "loss": 45.9478,
11365
+ "step": 1612
11366
+ },
11367
+ {
11368
+ "epoch": 2.8884163402350307,
11369
+ "grad_norm": 13.241888046264648,
11370
+ "learning_rate": 6.62434139937973e-07,
11371
+ "loss": 44.1643,
11372
+ "step": 1613
11373
+ },
11374
+ {
11375
+ "epoch": 2.890207050923335,
11376
+ "grad_norm": 14.501771926879883,
11377
+ "learning_rate": 6.409160537815817e-07,
11378
+ "loss": 45.4429,
11379
+ "step": 1614
11380
+ },
11381
+ {
11382
+ "epoch": 2.8919977616116395,
11383
+ "grad_norm": 16.729385375976562,
11384
+ "learning_rate": 6.197521284751929e-07,
11385
+ "loss": 46.4013,
11386
+ "step": 1615
11387
+ },
11388
+ {
11389
+ "epoch": 2.893788472299944,
11390
+ "grad_norm": 19.873050689697266,
11391
+ "learning_rate": 5.989424394566401e-07,
11392
+ "loss": 46.4873,
11393
+ "step": 1616
11394
+ },
11395
+ {
11396
+ "epoch": 2.8955791829882482,
11397
+ "grad_norm": 35.03314971923828,
11398
+ "learning_rate": 5.784870609010895e-07,
11399
+ "loss": 49.8218,
11400
+ "step": 1617
11401
+ },
11402
+ {
11403
+ "epoch": 2.8973698936765526,
11404
+ "grad_norm": 6.845594882965088,
11405
+ "learning_rate": 5.58386065720784e-07,
11406
+ "loss": 49.541,
11407
+ "step": 1618
11408
+ },
11409
+ {
11410
+ "epoch": 2.8991606043648575,
11411
+ "grad_norm": 7.069637298583984,
11412
+ "learning_rate": 5.386395255647992e-07,
11413
+ "loss": 49.1567,
11414
+ "step": 1619
11415
+ },
11416
+ {
11417
+ "epoch": 2.900951315053162,
11418
+ "grad_norm": 7.3913397789001465,
11419
+ "learning_rate": 5.192475108187544e-07,
11420
+ "loss": 50.0064,
11421
+ "step": 1620
11422
+ },
11423
+ {
11424
+ "epoch": 2.9027420257414662,
11425
+ "grad_norm": 8.274276733398438,
11426
+ "learning_rate": 5.002100906045914e-07,
11427
+ "loss": 48.457,
11428
+ "step": 1621
11429
+ },
11430
+ {
11431
+ "epoch": 2.9045327364297706,
11432
+ "grad_norm": 8.689967155456543,
11433
+ "learning_rate": 4.815273327803182e-07,
11434
+ "loss": 47.6567,
11435
+ "step": 1622
11436
+ },
11437
+ {
11438
+ "epoch": 2.906323447118075,
11439
+ "grad_norm": 7.821056365966797,
11440
+ "learning_rate": 4.6319930393974306e-07,
11441
+ "loss": 47.2448,
11442
+ "step": 1623
11443
+ },
11444
+ {
11445
+ "epoch": 2.9081141578063794,
11446
+ "grad_norm": 8.508157730102539,
11447
+ "learning_rate": 4.452260694122856e-07,
11448
+ "loss": 45.2271,
11449
+ "step": 1624
11450
+ },
11451
+ {
11452
+ "epoch": 2.9099048684946838,
11453
+ "grad_norm": 8.10664176940918,
11454
+ "learning_rate": 4.2760769326271044e-07,
11455
+ "loss": 48.1505,
11456
+ "step": 1625
11457
+ },
11458
+ {
11459
+ "epoch": 2.911695579182988,
11460
+ "grad_norm": 8.85130786895752,
11461
+ "learning_rate": 4.103442382909051e-07,
11462
+ "loss": 46.7489,
11463
+ "step": 1626
11464
+ },
11465
+ {
11466
+ "epoch": 2.9134862898712925,
11467
+ "grad_norm": 8.193948745727539,
11468
+ "learning_rate": 3.934357660316468e-07,
11469
+ "loss": 46.7504,
11470
+ "step": 1627
11471
+ },
11472
+ {
11473
+ "epoch": 2.915277000559597,
11474
+ "grad_norm": 7.961793422698975,
11475
+ "learning_rate": 3.7688233675439166e-07,
11476
+ "loss": 46.0781,
11477
+ "step": 1628
11478
+ },
11479
+ {
11480
+ "epoch": 2.9170677112479018,
11481
+ "grad_norm": 8.459404945373535,
11482
+ "learning_rate": 3.60684009463097e-07,
11483
+ "loss": 45.1151,
11484
+ "step": 1629
11485
+ },
11486
+ {
11487
+ "epoch": 2.918858421936206,
11488
+ "grad_norm": 10.196649551391602,
11489
+ "learning_rate": 3.4484084189593257e-07,
11490
+ "loss": 45.5705,
11491
+ "step": 1630
11492
+ },
11493
+ {
11494
+ "epoch": 2.9206491326245105,
11495
+ "grad_norm": 8.266303062438965,
11496
+ "learning_rate": 3.293528905251364e-07,
11497
+ "loss": 44.5015,
11498
+ "step": 1631
11499
+ },
11500
+ {
11501
+ "epoch": 2.922439843312815,
11502
+ "grad_norm": 8.931442260742188,
11503
+ "learning_rate": 3.1422021055679265e-07,
11504
+ "loss": 47.0272,
11505
+ "step": 1632
11506
+ },
11507
+ {
11508
+ "epoch": 2.9242305540011193,
11509
+ "grad_norm": 8.576935768127441,
11510
+ "learning_rate": 2.994428559306539e-07,
11511
+ "loss": 46.0597,
11512
+ "step": 1633
11513
+ },
11514
+ {
11515
+ "epoch": 2.9260212646894237,
11516
+ "grad_norm": 9.150681495666504,
11517
+ "learning_rate": 2.850208793198861e-07,
11518
+ "loss": 46.3989,
11519
+ "step": 1634
11520
+ },
11521
+ {
11522
+ "epoch": 2.927811975377728,
11523
+ "grad_norm": 9.138510704040527,
11524
+ "learning_rate": 2.709543321309793e-07,
11525
+ "loss": 46.3653,
11526
+ "step": 1635
11527
+ },
11528
+ {
11529
+ "epoch": 2.9296026860660325,
11530
+ "grad_norm": 9.75882625579834,
11531
+ "learning_rate": 2.572432645034817e-07,
11532
+ "loss": 46.9505,
11533
+ "step": 1636
11534
+ },
11535
+ {
11536
+ "epoch": 2.931393396754337,
11537
+ "grad_norm": 9.24614429473877,
11538
+ "learning_rate": 2.438877253098548e-07,
11539
+ "loss": 45.7768,
11540
+ "step": 1637
11541
+ },
11542
+ {
11543
+ "epoch": 2.9331841074426412,
11544
+ "grad_norm": 8.936918258666992,
11545
+ "learning_rate": 2.3088776215531848e-07,
11546
+ "loss": 46.8492,
11547
+ "step": 1638
11548
+ },
11549
+ {
11550
+ "epoch": 2.9349748181309456,
11551
+ "grad_norm": 9.657326698303223,
11552
+ "learning_rate": 2.1824342137760633e-07,
11553
+ "loss": 46.1916,
11554
+ "step": 1639
11555
+ },
11556
+ {
11557
+ "epoch": 2.93676552881925,
11558
+ "grad_norm": 9.459945678710938,
11559
+ "learning_rate": 2.0595474804691038e-07,
11560
+ "loss": 45.4075,
11561
+ "step": 1640
11562
+ },
11563
+ {
11564
+ "epoch": 2.9385562395075544,
11565
+ "grad_norm": 9.631020545959473,
11566
+ "learning_rate": 1.940217859656257e-07,
11567
+ "loss": 45.8027,
11568
+ "step": 1641
11569
+ },
11570
+ {
11571
+ "epoch": 2.940346950195859,
11572
+ "grad_norm": 10.288619041442871,
11573
+ "learning_rate": 1.824445776682504e-07,
11574
+ "loss": 45.495,
11575
+ "step": 1642
11576
+ },
11577
+ {
11578
+ "epoch": 2.942137660884163,
11579
+ "grad_norm": 9.248783111572266,
11580
+ "learning_rate": 1.712231644212081e-07,
11581
+ "loss": 45.5225,
11582
+ "step": 1643
11583
+ },
11584
+ {
11585
+ "epoch": 2.9439283715724676,
11586
+ "grad_norm": 9.824495315551758,
11587
+ "learning_rate": 1.6035758622269247e-07,
11588
+ "loss": 47.1098,
11589
+ "step": 1644
11590
+ },
11591
+ {
11592
+ "epoch": 2.9457190822607724,
11593
+ "grad_norm": 9.428007125854492,
11594
+ "learning_rate": 1.4984788180256726e-07,
11595
+ "loss": 47.0757,
11596
+ "step": 1645
11597
+ },
11598
+ {
11599
+ "epoch": 2.947509792949077,
11600
+ "grad_norm": 9.915323257446289,
11601
+ "learning_rate": 1.3969408862217758e-07,
11602
+ "loss": 46.4815,
11603
+ "step": 1646
11604
+ },
11605
+ {
11606
+ "epoch": 2.949300503637381,
11607
+ "grad_norm": 10.301142692565918,
11608
+ "learning_rate": 1.2989624287425007e-07,
11609
+ "loss": 45.5609,
11610
+ "step": 1647
11611
+ },
11612
+ {
11613
+ "epoch": 2.9510912143256856,
11614
+ "grad_norm": 10.241244316101074,
11615
+ "learning_rate": 1.204543794827595e-07,
11616
+ "loss": 46.7959,
11617
+ "step": 1648
11618
+ },
11619
+ {
11620
+ "epoch": 2.95288192501399,
11621
+ "grad_norm": 10.639913558959961,
11622
+ "learning_rate": 1.1136853210278463e-07,
11623
+ "loss": 46.3414,
11624
+ "step": 1649
11625
+ },
11626
+ {
11627
+ "epoch": 2.9546726357022943,
11628
+ "grad_norm": 10.630585670471191,
11629
+ "learning_rate": 1.0263873312040818e-07,
11630
+ "loss": 45.8505,
11631
+ "step": 1650
11632
+ },
11633
+ {
11634
+ "epoch": 2.9564633463905987,
11635
+ "grad_norm": 10.834024429321289,
11636
+ "learning_rate": 9.426501365259465e-08,
11637
+ "loss": 45.8424,
11638
+ "step": 1651
11639
+ },
11640
+ {
11641
+ "epoch": 2.958254057078903,
11642
+ "grad_norm": 11.740537643432617,
11643
+ "learning_rate": 8.624740354707949e-08,
11644
+ "loss": 45.8525,
11645
+ "step": 1652
11646
+ },
11647
+ {
11648
+ "epoch": 2.9600447677672075,
11649
+ "grad_norm": 10.828522682189941,
11650
+ "learning_rate": 7.85859313822579e-08,
11651
+ "loss": 45.7175,
11652
+ "step": 1653
11653
+ },
11654
+ {
11655
+ "epoch": 2.961835478455512,
11656
+ "grad_norm": 11.889662742614746,
11657
+ "learning_rate": 7.128062446709604e-08,
11658
+ "loss": 45.1248,
11659
+ "step": 1654
11660
+ },
11661
+ {
11662
+ "epoch": 2.9636261891438167,
11663
+ "grad_norm": 12.827479362487793,
11664
+ "learning_rate": 6.433150884102012e-08,
11665
+ "loss": 46.8401,
11666
+ "step": 1655
11667
+ },
11668
+ {
11669
+ "epoch": 2.965416899832121,
11670
+ "grad_norm": 11.234233856201172,
11671
+ "learning_rate": 5.773860927383856e-08,
11672
+ "loss": 45.9078,
11673
+ "step": 1656
11674
+ },
11675
+ {
11676
+ "epoch": 2.9672076105204255,
11677
+ "grad_norm": 11.893027305603027,
11678
+ "learning_rate": 5.150194926561991e-08,
11679
+ "loss": 46.3441,
11680
+ "step": 1657
11681
+ },
11682
+ {
11683
+ "epoch": 2.96899832120873,
11684
+ "grad_norm": 12.164173126220703,
11685
+ "learning_rate": 4.562155104665955e-08,
11686
+ "loss": 45.0211,
11687
+ "step": 1658
11688
+ },
11689
+ {
11690
+ "epoch": 2.9707890318970342,
11691
+ "grad_norm": 11.903395652770996,
11692
+ "learning_rate": 4.009743557736867e-08,
11693
+ "loss": 44.686,
11694
+ "step": 1659
11695
+ },
11696
+ {
11697
+ "epoch": 2.9725797425853386,
11698
+ "grad_norm": 12.025436401367188,
11699
+ "learning_rate": 3.492962254819654e-08,
11700
+ "loss": 44.5997,
11701
+ "step": 1660
11702
+ },
11703
+ {
11704
+ "epoch": 2.974370453273643,
11705
+ "grad_norm": 14.756345748901367,
11706
+ "learning_rate": 3.0118130379575005e-08,
11707
+ "loss": 46.8916,
11708
+ "step": 1661
11709
+ },
11710
+ {
11711
+ "epoch": 2.9761611639619474,
11712
+ "grad_norm": 13.509708404541016,
11713
+ "learning_rate": 2.5662976221840773e-08,
11714
+ "loss": 45.3565,
11715
+ "step": 1662
11716
+ },
11717
+ {
11718
+ "epoch": 2.977951874650252,
11719
+ "grad_norm": 13.354639053344727,
11720
+ "learning_rate": 2.1564175955191e-08,
11721
+ "loss": 44.1853,
11722
+ "step": 1663
11723
+ },
11724
+ {
11725
+ "epoch": 2.979742585338556,
11726
+ "grad_norm": 15.299182891845703,
11727
+ "learning_rate": 1.7821744189605582e-08,
11728
+ "loss": 45.265,
11729
+ "step": 1664
11730
+ },
11731
+ {
11732
+ "epoch": 2.9815332960268606,
11733
+ "grad_norm": 17.097658157348633,
11734
+ "learning_rate": 1.4435694264802735e-08,
11735
+ "loss": 47.0729,
11736
+ "step": 1665
11737
+ },
11738
+ {
11739
+ "epoch": 2.983324006715165,
11740
+ "grad_norm": 20.666465759277344,
11741
+ "learning_rate": 1.1406038250205698e-08,
11742
+ "loss": 45.6816,
11743
+ "step": 1666
11744
+ },
11745
+ {
11746
+ "epoch": 2.9851147174034693,
11747
+ "grad_norm": 35.938072204589844,
11748
+ "learning_rate": 8.732786944887217e-09,
11749
+ "loss": 45.4332,
11750
+ "step": 1667
11751
+ },
11752
+ {
11753
+ "epoch": 2.9869054280917737,
11754
+ "grad_norm": 9.02865982055664,
11755
+ "learning_rate": 6.41594987752514e-09,
11756
+ "loss": 48.6141,
11757
+ "step": 1668
11758
+ },
11759
+ {
11760
+ "epoch": 2.988696138780078,
11761
+ "grad_norm": 8.4238862991333,
11762
+ "learning_rate": 4.45553530638021e-09,
11763
+ "loss": 46.6257,
11764
+ "step": 1669
11765
+ },
11766
+ {
11767
+ "epoch": 2.9904868494683825,
11768
+ "grad_norm": 9.113216400146484,
11769
+ "learning_rate": 2.851550219240551e-09,
11770
+ "loss": 47.1215,
11771
+ "step": 1670
11772
+ },
11773
+ {
11774
+ "epoch": 2.9922775601566873,
11775
+ "grad_norm": 9.337307929992676,
11776
+ "learning_rate": 1.6040003334660825e-09,
11777
+ "loss": 46.2172,
11778
+ "step": 1671
11779
+ },
11780
+ {
11781
+ "epoch": 2.9940682708449917,
11782
+ "grad_norm": 10.280569076538086,
11783
+ "learning_rate": 7.128900958774942e-10,
11784
+ "loss": 46.1396,
11785
+ "step": 1672
11786
+ },
11787
+ {
11788
+ "epoch": 2.995858981533296,
11789
+ "grad_norm": 10.593201637268066,
11790
+ "learning_rate": 1.7822268278955278e-10,
11791
+ "loss": 46.0949,
11792
+ "step": 1673
11793
+ },
11794
+ {
11795
+ "epoch": 2.9976496922216005,
11796
+ "grad_norm": 11.723645210266113,
11797
+ "learning_rate": 0.0,
11798
+ "loss": 45.7927,
11799
+ "step": 1674
11800
  }
11801
  ],
11802
  "logging_steps": 1,
 
11820
  "should_evaluate": false,
11821
  "should_log": false,
11822
  "should_save": true,
11823
+ "should_training_stop": true
11824
  },
11825
  "attributes": {}
11826
  }
11827
  },
11828
+ "total_flos": 2.539421652882555e+17,
11829
  "train_batch_size": 4,
11830
  "trial_name": null,
11831
  "trial_params": null