Training in progress, step 1674, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +522 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19861dcf8a6a8f8201539e1292448ff8fde1d95562739a22a4e9cb4605bdd467
 size 100690184

 version https://git-lfs.github.com/spec/v1
+oid sha256:819b69318860d5951783bfc528a2084dfcda651d1448098a9385f110007dc9fb
 size 100690184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcb350e88869cd3711e1fe790137e2796593515caf7899b21bb1daf01f030571
 size 201491258

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8dbad136c676282d7efd0afd0702cbcde32f375648a43b4e368c5a9bde5bcda
 size 201491258

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9abd993835e132a028c4f2b8840728fbf0491d982783798ace8f45074d8789e5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1acc65d1d3866a00728b9081b04cc25d66babddcb236f1ba29bec083e40a3de0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b8bd8f62612ef7e1c9aaca9278fcaf68baaf66d9c89cc173c01f8a58701a2f1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce38738d196548234ebf53011cfaf5415f3264a1b3a4e20437d972670cfd877b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.9557929039001465,
   "best_model_checkpoint": "miner_id_24/checkpoint-1600",
-  "epoch": 2.865137101287073,
   "eval_steps": 200,
-  "global_step": 1600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11279,6 +11279,524 @@
       "eval_samples_per_second": 23.388,
       "eval_steps_per_second": 5.847,
       "step": 1600
     }
   ],
   "logging_steps": 1,
@@ -11302,12 +11820,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.4278396638435738e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.9557929039001465,
   "best_model_checkpoint": "miner_id_24/checkpoint-1600",
+  "epoch": 2.9976496922216005,
   "eval_steps": 200,
+  "global_step": 1674,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.388,
       "eval_steps_per_second": 5.847,
       "step": 1600
+    },
+    {
+      "epoch": 2.8669278119753776,
+      "grad_norm": 10.28209114074707,
+      "learning_rate": 9.482465384252792e-07,
+      "loss": 44.263,
+      "step": 1601
+    },
+    {
+      "epoch": 2.868718522663682,
+      "grad_norm": 10.77933120727539,
+      "learning_rate": 9.224848654469931e-07,
+      "loss": 45.5788,
+      "step": 1602
+    },
+    {
+      "epoch": 2.870509233351987,
+      "grad_norm": 10.603645324707031,
+      "learning_rate": 8.970763496797152e-07,
+      "loss": 46.1126,
+      "step": 1603
+    },
+    {
+      "epoch": 2.872299944040291,
+      "grad_norm": 10.797646522521973,
+      "learning_rate": 8.720210816909435e-07,
+      "loss": 45.0403,
+      "step": 1604
+    },
+    {
+      "epoch": 2.8740906547285956,
+      "grad_norm": 11.36728572845459,
+      "learning_rate": 8.473191507889944e-07,
+      "loss": 46.0607,
+      "step": 1605
+    },
+    {
+      "epoch": 2.8758813654169,
+      "grad_norm": 11.209030151367188,
+      "learning_rate": 8.229706450227803e-07,
+      "loss": 45.0385,
+      "step": 1606
+    },
+    {
+      "epoch": 2.8776720761052044,
+      "grad_norm": 12.537467956542969,
+      "learning_rate": 7.989756511814106e-07,
+      "loss": 46.5373,
+      "step": 1607
+    },
+    {
+      "epoch": 2.8794627867935088,
+      "grad_norm": 12.144121170043945,
+      "learning_rate": 7.753342547939357e-07,
+      "loss": 47.1403,
+      "step": 1608
+    },
+    {
+      "epoch": 2.881253497481813,
+      "grad_norm": 12.328975677490234,
+      "learning_rate": 7.520465401290033e-07,
+      "loss": 46.1814,
+      "step": 1609
+    },
+    {
+      "epoch": 2.8830442081701175,
+      "grad_norm": 12.097262382507324,
+      "learning_rate": 7.291125901946027e-07,
+      "loss": 44.0594,
+      "step": 1610
+    },
+    {
+      "epoch": 2.884834918858422,
+      "grad_norm": 14.521700859069824,
+      "learning_rate": 7.065324867377321e-07,
+      "loss": 46.0011,
+      "step": 1611
+    },
+    {
+      "epoch": 2.8866256295467263,
+      "grad_norm": 15.199280738830566,
+      "learning_rate": 6.843063102441316e-07,
+      "loss": 45.9478,
+      "step": 1612
+    },
+    {
+      "epoch": 2.8884163402350307,
+      "grad_norm": 13.241888046264648,
+      "learning_rate": 6.62434139937973e-07,
+      "loss": 44.1643,
+      "step": 1613
+    },
+    {
+      "epoch": 2.890207050923335,
+      "grad_norm": 14.501771926879883,
+      "learning_rate": 6.409160537815817e-07,
+      "loss": 45.4429,
+      "step": 1614
+    },
+    {
+      "epoch": 2.8919977616116395,
+      "grad_norm": 16.729385375976562,
+      "learning_rate": 6.197521284751929e-07,
+      "loss": 46.4013,
+      "step": 1615
+    },
+    {
+      "epoch": 2.893788472299944,
+      "grad_norm": 19.873050689697266,
+      "learning_rate": 5.989424394566401e-07,
+      "loss": 46.4873,
+      "step": 1616
+    },
+    {
+      "epoch": 2.8955791829882482,
+      "grad_norm": 35.03314971923828,
+      "learning_rate": 5.784870609010895e-07,
+      "loss": 49.8218,
+      "step": 1617
+    },
+    {
+      "epoch": 2.8973698936765526,
+      "grad_norm": 6.845594882965088,
+      "learning_rate": 5.58386065720784e-07,
+      "loss": 49.541,
+      "step": 1618
+    },
+    {
+      "epoch": 2.8991606043648575,
+      "grad_norm": 7.069637298583984,
+      "learning_rate": 5.386395255647992e-07,
+      "loss": 49.1567,
+      "step": 1619
+    },
+    {
+      "epoch": 2.900951315053162,
+      "grad_norm": 7.3913397789001465,
+      "learning_rate": 5.192475108187544e-07,
+      "loss": 50.0064,
+      "step": 1620
+    },
+    {
+      "epoch": 2.9027420257414662,
+      "grad_norm": 8.274276733398438,
+      "learning_rate": 5.002100906045914e-07,
+      "loss": 48.457,
+      "step": 1621
+    },
+    {
+      "epoch": 2.9045327364297706,
+      "grad_norm": 8.689967155456543,
+      "learning_rate": 4.815273327803182e-07,
+      "loss": 47.6567,
+      "step": 1622
+    },
+    {
+      "epoch": 2.906323447118075,
+      "grad_norm": 7.821056365966797,
+      "learning_rate": 4.6319930393974306e-07,
+      "loss": 47.2448,
+      "step": 1623
+    },
+    {
+      "epoch": 2.9081141578063794,
+      "grad_norm": 8.508157730102539,
+      "learning_rate": 4.452260694122856e-07,
+      "loss": 45.2271,
+      "step": 1624
+    },
+    {
+      "epoch": 2.9099048684946838,
+      "grad_norm": 8.10664176940918,
+      "learning_rate": 4.2760769326271044e-07,
+      "loss": 48.1505,
+      "step": 1625
+    },
+    {
+      "epoch": 2.911695579182988,
+      "grad_norm": 8.85130786895752,
+      "learning_rate": 4.103442382909051e-07,
+      "loss": 46.7489,
+      "step": 1626
+    },
+    {
+      "epoch": 2.9134862898712925,
+      "grad_norm": 8.193948745727539,
+      "learning_rate": 3.934357660316468e-07,
+      "loss": 46.7504,
+      "step": 1627
+    },
+    {
+      "epoch": 2.915277000559597,
+      "grad_norm": 7.961793422698975,
+      "learning_rate": 3.7688233675439166e-07,
+      "loss": 46.0781,
+      "step": 1628
+    },
+    {
+      "epoch": 2.9170677112479018,
+      "grad_norm": 8.459404945373535,
+      "learning_rate": 3.60684009463097e-07,
+      "loss": 45.1151,
+      "step": 1629
+    },
+    {
+      "epoch": 2.918858421936206,
+      "grad_norm": 10.196649551391602,
+      "learning_rate": 3.4484084189593257e-07,
+      "loss": 45.5705,
+      "step": 1630
+    },
+    {
+      "epoch": 2.9206491326245105,
+      "grad_norm": 8.266303062438965,
+      "learning_rate": 3.293528905251364e-07,
+      "loss": 44.5015,
+      "step": 1631
+    },
+    {
+      "epoch": 2.922439843312815,
+      "grad_norm": 8.931442260742188,
+      "learning_rate": 3.1422021055679265e-07,
+      "loss": 47.0272,
+      "step": 1632
+    },
+    {
+      "epoch": 2.9242305540011193,
+      "grad_norm": 8.576935768127441,
+      "learning_rate": 2.994428559306539e-07,
+      "loss": 46.0597,
+      "step": 1633
+    },
+    {
+      "epoch": 2.9260212646894237,
+      "grad_norm": 9.150681495666504,
+      "learning_rate": 2.850208793198861e-07,
+      "loss": 46.3989,
+      "step": 1634
+    },
+    {
+      "epoch": 2.927811975377728,
+      "grad_norm": 9.138510704040527,
+      "learning_rate": 2.709543321309793e-07,
+      "loss": 46.3653,
+      "step": 1635
+    },
+    {
+      "epoch": 2.9296026860660325,
+      "grad_norm": 9.75882625579834,
+      "learning_rate": 2.572432645034817e-07,
+      "loss": 46.9505,
+      "step": 1636
+    },
+    {
+      "epoch": 2.931393396754337,
+      "grad_norm": 9.24614429473877,
+      "learning_rate": 2.438877253098548e-07,
+      "loss": 45.7768,
+      "step": 1637
+    },
+    {
+      "epoch": 2.9331841074426412,
+      "grad_norm": 8.936918258666992,
+      "learning_rate": 2.3088776215531848e-07,
+      "loss": 46.8492,
+      "step": 1638
+    },
+    {
+      "epoch": 2.9349748181309456,
+      "grad_norm": 9.657326698303223,
+      "learning_rate": 2.1824342137760633e-07,
+      "loss": 46.1916,
+      "step": 1639
+    },
+    {
+      "epoch": 2.93676552881925,
+      "grad_norm": 9.459945678710938,
+      "learning_rate": 2.0595474804691038e-07,
+      "loss": 45.4075,
+      "step": 1640
+    },
+    {
+      "epoch": 2.9385562395075544,
+      "grad_norm": 9.631020545959473,
+      "learning_rate": 1.940217859656257e-07,
+      "loss": 45.8027,
+      "step": 1641
+    },
+    {
+      "epoch": 2.940346950195859,
+      "grad_norm": 10.288619041442871,
+      "learning_rate": 1.824445776682504e-07,
+      "loss": 45.495,
+      "step": 1642
+    },
+    {
+      "epoch": 2.942137660884163,
+      "grad_norm": 9.248783111572266,
+      "learning_rate": 1.712231644212081e-07,
+      "loss": 45.5225,
+      "step": 1643
+    },
+    {
+      "epoch": 2.9439283715724676,
+      "grad_norm": 9.824495315551758,
+      "learning_rate": 1.6035758622269247e-07,
+      "loss": 47.1098,
+      "step": 1644
+    },
+    {
+      "epoch": 2.9457190822607724,
+      "grad_norm": 9.428007125854492,
+      "learning_rate": 1.4984788180256726e-07,
+      "loss": 47.0757,
+      "step": 1645
+    },
+    {
+      "epoch": 2.947509792949077,
+      "grad_norm": 9.915323257446289,
+      "learning_rate": 1.3969408862217758e-07,
+      "loss": 46.4815,
+      "step": 1646
+    },
+    {
+      "epoch": 2.949300503637381,
+      "grad_norm": 10.301142692565918,
+      "learning_rate": 1.2989624287425007e-07,
+      "loss": 45.5609,
+      "step": 1647
+    },
+    {
+      "epoch": 2.9510912143256856,
+      "grad_norm": 10.241244316101074,
+      "learning_rate": 1.204543794827595e-07,
+      "loss": 46.7959,
+      "step": 1648
+    },
+    {
+      "epoch": 2.95288192501399,
+      "grad_norm": 10.639913558959961,
+      "learning_rate": 1.1136853210278463e-07,
+      "loss": 46.3414,
+      "step": 1649
+    },
+    {
+      "epoch": 2.9546726357022943,
+      "grad_norm": 10.630585670471191,
+      "learning_rate": 1.0263873312040818e-07,
+      "loss": 45.8505,
+      "step": 1650
+    },
+    {
+      "epoch": 2.9564633463905987,
+      "grad_norm": 10.834024429321289,
+      "learning_rate": 9.426501365259465e-08,
+      "loss": 45.8424,
+      "step": 1651
+    },
+    {
+      "epoch": 2.958254057078903,
+      "grad_norm": 11.740537643432617,
+      "learning_rate": 8.624740354707949e-08,
+      "loss": 45.8525,
+      "step": 1652
+    },
+    {
+      "epoch": 2.9600447677672075,
+      "grad_norm": 10.828522682189941,
+      "learning_rate": 7.85859313822579e-08,
+      "loss": 45.7175,
+      "step": 1653
+    },
+    {
+      "epoch": 2.961835478455512,
+      "grad_norm": 11.889662742614746,
+      "learning_rate": 7.128062446709604e-08,
+      "loss": 45.1248,
+      "step": 1654
+    },
+    {
+      "epoch": 2.9636261891438167,
+      "grad_norm": 12.827479362487793,
+      "learning_rate": 6.433150884102012e-08,
+      "loss": 46.8401,
+      "step": 1655
+    },
+    {
+      "epoch": 2.965416899832121,
+      "grad_norm": 11.234233856201172,
+      "learning_rate": 5.773860927383856e-08,
+      "loss": 45.9078,
+      "step": 1656
+    },
+    {
+      "epoch": 2.9672076105204255,
+      "grad_norm": 11.893027305603027,
+      "learning_rate": 5.150194926561991e-08,
+      "loss": 46.3441,
+      "step": 1657
+    },
+    {
+      "epoch": 2.96899832120873,
+      "grad_norm": 12.164173126220703,
+      "learning_rate": 4.562155104665955e-08,
+      "loss": 45.0211,
+      "step": 1658
+    },
+    {
+      "epoch": 2.9707890318970342,
+      "grad_norm": 11.903395652770996,
+      "learning_rate": 4.009743557736867e-08,
+      "loss": 44.686,
+      "step": 1659
+    },
+    {
+      "epoch": 2.9725797425853386,
+      "grad_norm": 12.025436401367188,
+      "learning_rate": 3.492962254819654e-08,
+      "loss": 44.5997,
+      "step": 1660
+    },
+    {
+      "epoch": 2.974370453273643,
+      "grad_norm": 14.756345748901367,
+      "learning_rate": 3.0118130379575005e-08,
+      "loss": 46.8916,
+      "step": 1661
+    },
+    {
+      "epoch": 2.9761611639619474,
+      "grad_norm": 13.509708404541016,
+      "learning_rate": 2.5662976221840773e-08,
+      "loss": 45.3565,
+      "step": 1662
+    },
+    {
+      "epoch": 2.977951874650252,
+      "grad_norm": 13.354639053344727,
+      "learning_rate": 2.1564175955191e-08,
+      "loss": 44.1853,
+      "step": 1663
+    },
+    {
+      "epoch": 2.979742585338556,
+      "grad_norm": 15.299182891845703,
+      "learning_rate": 1.7821744189605582e-08,
+      "loss": 45.265,
+      "step": 1664
+    },
+    {
+      "epoch": 2.9815332960268606,
+      "grad_norm": 17.097658157348633,
+      "learning_rate": 1.4435694264802735e-08,
+      "loss": 47.0729,
+      "step": 1665
+    },
+    {
+      "epoch": 2.983324006715165,
+      "grad_norm": 20.666465759277344,
+      "learning_rate": 1.1406038250205698e-08,
+      "loss": 45.6816,
+      "step": 1666
+    },
+    {
+      "epoch": 2.9851147174034693,
+      "grad_norm": 35.938072204589844,
+      "learning_rate": 8.732786944887217e-09,
+      "loss": 45.4332,
+      "step": 1667
+    },
+    {
+      "epoch": 2.9869054280917737,
+      "grad_norm": 9.02865982055664,
+      "learning_rate": 6.41594987752514e-09,
+      "loss": 48.6141,
+      "step": 1668
+    },
+    {
+      "epoch": 2.988696138780078,
+      "grad_norm": 8.4238862991333,
+      "learning_rate": 4.45553530638021e-09,
+      "loss": 46.6257,
+      "step": 1669
+    },
+    {
+      "epoch": 2.9904868494683825,
+      "grad_norm": 9.113216400146484,
+      "learning_rate": 2.851550219240551e-09,
+      "loss": 47.1215,
+      "step": 1670
+    },
+    {
+      "epoch": 2.9922775601566873,
+      "grad_norm": 9.337307929992676,
+      "learning_rate": 1.6040003334660825e-09,
+      "loss": 46.2172,
+      "step": 1671
+    },
+    {
+      "epoch": 2.9940682708449917,
+      "grad_norm": 10.280569076538086,
+      "learning_rate": 7.128900958774942e-10,
+      "loss": 46.1396,
+      "step": 1672
+    },
+    {
+      "epoch": 2.995858981533296,
+      "grad_norm": 10.593201637268066,
+      "learning_rate": 1.7822268278955278e-10,
+      "loss": 46.0949,
+      "step": 1673
+    },
+    {
+      "epoch": 2.9976496922216005,
+      "grad_norm": 11.723645210266113,
+      "learning_rate": 0.0,
+      "loss": 45.7927,
+      "step": 1674
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.539421652882555e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null