Training in progress, step 1674, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100690184
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:819b69318860d5951783bfc528a2084dfcda651d1448098a9385f110007dc9fb
|
3 |
size 100690184
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201491258
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8dbad136c676282d7efd0afd0702cbcde32f375648a43b4e368c5a9bde5bcda
|
3 |
size 201491258
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1acc65d1d3866a00728b9081b04cc25d66babddcb236f1ba29bec083e40a3de0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce38738d196548234ebf53011cfaf5415f3264a1b3a4e20437d972670cfd877b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.9557929039001465,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11279,6 +11279,524 @@
|
|
11279 |
"eval_samples_per_second": 23.388,
|
11280 |
"eval_steps_per_second": 5.847,
|
11281 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11282 |
}
|
11283 |
],
|
11284 |
"logging_steps": 1,
|
@@ -11302,12 +11820,12 @@
|
|
11302 |
"should_evaluate": false,
|
11303 |
"should_log": false,
|
11304 |
"should_save": true,
|
11305 |
-
"should_training_stop":
|
11306 |
},
|
11307 |
"attributes": {}
|
11308 |
}
|
11309 |
},
|
11310 |
-
"total_flos": 2.
|
11311 |
"train_batch_size": 4,
|
11312 |
"trial_name": null,
|
11313 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.9557929039001465,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
4 |
+
"epoch": 2.9976496922216005,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 1674,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11279 |
"eval_samples_per_second": 23.388,
|
11280 |
"eval_steps_per_second": 5.847,
|
11281 |
"step": 1600
|
11282 |
+
},
|
11283 |
+
{
|
11284 |
+
"epoch": 2.8669278119753776,
|
11285 |
+
"grad_norm": 10.28209114074707,
|
11286 |
+
"learning_rate": 9.482465384252792e-07,
|
11287 |
+
"loss": 44.263,
|
11288 |
+
"step": 1601
|
11289 |
+
},
|
11290 |
+
{
|
11291 |
+
"epoch": 2.868718522663682,
|
11292 |
+
"grad_norm": 10.77933120727539,
|
11293 |
+
"learning_rate": 9.224848654469931e-07,
|
11294 |
+
"loss": 45.5788,
|
11295 |
+
"step": 1602
|
11296 |
+
},
|
11297 |
+
{
|
11298 |
+
"epoch": 2.870509233351987,
|
11299 |
+
"grad_norm": 10.603645324707031,
|
11300 |
+
"learning_rate": 8.970763496797152e-07,
|
11301 |
+
"loss": 46.1126,
|
11302 |
+
"step": 1603
|
11303 |
+
},
|
11304 |
+
{
|
11305 |
+
"epoch": 2.872299944040291,
|
11306 |
+
"grad_norm": 10.797646522521973,
|
11307 |
+
"learning_rate": 8.720210816909435e-07,
|
11308 |
+
"loss": 45.0403,
|
11309 |
+
"step": 1604
|
11310 |
+
},
|
11311 |
+
{
|
11312 |
+
"epoch": 2.8740906547285956,
|
11313 |
+
"grad_norm": 11.36728572845459,
|
11314 |
+
"learning_rate": 8.473191507889944e-07,
|
11315 |
+
"loss": 46.0607,
|
11316 |
+
"step": 1605
|
11317 |
+
},
|
11318 |
+
{
|
11319 |
+
"epoch": 2.8758813654169,
|
11320 |
+
"grad_norm": 11.209030151367188,
|
11321 |
+
"learning_rate": 8.229706450227803e-07,
|
11322 |
+
"loss": 45.0385,
|
11323 |
+
"step": 1606
|
11324 |
+
},
|
11325 |
+
{
|
11326 |
+
"epoch": 2.8776720761052044,
|
11327 |
+
"grad_norm": 12.537467956542969,
|
11328 |
+
"learning_rate": 7.989756511814106e-07,
|
11329 |
+
"loss": 46.5373,
|
11330 |
+
"step": 1607
|
11331 |
+
},
|
11332 |
+
{
|
11333 |
+
"epoch": 2.8794627867935088,
|
11334 |
+
"grad_norm": 12.144121170043945,
|
11335 |
+
"learning_rate": 7.753342547939357e-07,
|
11336 |
+
"loss": 47.1403,
|
11337 |
+
"step": 1608
|
11338 |
+
},
|
11339 |
+
{
|
11340 |
+
"epoch": 2.881253497481813,
|
11341 |
+
"grad_norm": 12.328975677490234,
|
11342 |
+
"learning_rate": 7.520465401290033e-07,
|
11343 |
+
"loss": 46.1814,
|
11344 |
+
"step": 1609
|
11345 |
+
},
|
11346 |
+
{
|
11347 |
+
"epoch": 2.8830442081701175,
|
11348 |
+
"grad_norm": 12.097262382507324,
|
11349 |
+
"learning_rate": 7.291125901946027e-07,
|
11350 |
+
"loss": 44.0594,
|
11351 |
+
"step": 1610
|
11352 |
+
},
|
11353 |
+
{
|
11354 |
+
"epoch": 2.884834918858422,
|
11355 |
+
"grad_norm": 14.521700859069824,
|
11356 |
+
"learning_rate": 7.065324867377321e-07,
|
11357 |
+
"loss": 46.0011,
|
11358 |
+
"step": 1611
|
11359 |
+
},
|
11360 |
+
{
|
11361 |
+
"epoch": 2.8866256295467263,
|
11362 |
+
"grad_norm": 15.199280738830566,
|
11363 |
+
"learning_rate": 6.843063102441316e-07,
|
11364 |
+
"loss": 45.9478,
|
11365 |
+
"step": 1612
|
11366 |
+
},
|
11367 |
+
{
|
11368 |
+
"epoch": 2.8884163402350307,
|
11369 |
+
"grad_norm": 13.241888046264648,
|
11370 |
+
"learning_rate": 6.62434139937973e-07,
|
11371 |
+
"loss": 44.1643,
|
11372 |
+
"step": 1613
|
11373 |
+
},
|
11374 |
+
{
|
11375 |
+
"epoch": 2.890207050923335,
|
11376 |
+
"grad_norm": 14.501771926879883,
|
11377 |
+
"learning_rate": 6.409160537815817e-07,
|
11378 |
+
"loss": 45.4429,
|
11379 |
+
"step": 1614
|
11380 |
+
},
|
11381 |
+
{
|
11382 |
+
"epoch": 2.8919977616116395,
|
11383 |
+
"grad_norm": 16.729385375976562,
|
11384 |
+
"learning_rate": 6.197521284751929e-07,
|
11385 |
+
"loss": 46.4013,
|
11386 |
+
"step": 1615
|
11387 |
+
},
|
11388 |
+
{
|
11389 |
+
"epoch": 2.893788472299944,
|
11390 |
+
"grad_norm": 19.873050689697266,
|
11391 |
+
"learning_rate": 5.989424394566401e-07,
|
11392 |
+
"loss": 46.4873,
|
11393 |
+
"step": 1616
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 2.8955791829882482,
|
11397 |
+
"grad_norm": 35.03314971923828,
|
11398 |
+
"learning_rate": 5.784870609010895e-07,
|
11399 |
+
"loss": 49.8218,
|
11400 |
+
"step": 1617
|
11401 |
+
},
|
11402 |
+
{
|
11403 |
+
"epoch": 2.8973698936765526,
|
11404 |
+
"grad_norm": 6.845594882965088,
|
11405 |
+
"learning_rate": 5.58386065720784e-07,
|
11406 |
+
"loss": 49.541,
|
11407 |
+
"step": 1618
|
11408 |
+
},
|
11409 |
+
{
|
11410 |
+
"epoch": 2.8991606043648575,
|
11411 |
+
"grad_norm": 7.069637298583984,
|
11412 |
+
"learning_rate": 5.386395255647992e-07,
|
11413 |
+
"loss": 49.1567,
|
11414 |
+
"step": 1619
|
11415 |
+
},
|
11416 |
+
{
|
11417 |
+
"epoch": 2.900951315053162,
|
11418 |
+
"grad_norm": 7.3913397789001465,
|
11419 |
+
"learning_rate": 5.192475108187544e-07,
|
11420 |
+
"loss": 50.0064,
|
11421 |
+
"step": 1620
|
11422 |
+
},
|
11423 |
+
{
|
11424 |
+
"epoch": 2.9027420257414662,
|
11425 |
+
"grad_norm": 8.274276733398438,
|
11426 |
+
"learning_rate": 5.002100906045914e-07,
|
11427 |
+
"loss": 48.457,
|
11428 |
+
"step": 1621
|
11429 |
+
},
|
11430 |
+
{
|
11431 |
+
"epoch": 2.9045327364297706,
|
11432 |
+
"grad_norm": 8.689967155456543,
|
11433 |
+
"learning_rate": 4.815273327803182e-07,
|
11434 |
+
"loss": 47.6567,
|
11435 |
+
"step": 1622
|
11436 |
+
},
|
11437 |
+
{
|
11438 |
+
"epoch": 2.906323447118075,
|
11439 |
+
"grad_norm": 7.821056365966797,
|
11440 |
+
"learning_rate": 4.6319930393974306e-07,
|
11441 |
+
"loss": 47.2448,
|
11442 |
+
"step": 1623
|
11443 |
+
},
|
11444 |
+
{
|
11445 |
+
"epoch": 2.9081141578063794,
|
11446 |
+
"grad_norm": 8.508157730102539,
|
11447 |
+
"learning_rate": 4.452260694122856e-07,
|
11448 |
+
"loss": 45.2271,
|
11449 |
+
"step": 1624
|
11450 |
+
},
|
11451 |
+
{
|
11452 |
+
"epoch": 2.9099048684946838,
|
11453 |
+
"grad_norm": 8.10664176940918,
|
11454 |
+
"learning_rate": 4.2760769326271044e-07,
|
11455 |
+
"loss": 48.1505,
|
11456 |
+
"step": 1625
|
11457 |
+
},
|
11458 |
+
{
|
11459 |
+
"epoch": 2.911695579182988,
|
11460 |
+
"grad_norm": 8.85130786895752,
|
11461 |
+
"learning_rate": 4.103442382909051e-07,
|
11462 |
+
"loss": 46.7489,
|
11463 |
+
"step": 1626
|
11464 |
+
},
|
11465 |
+
{
|
11466 |
+
"epoch": 2.9134862898712925,
|
11467 |
+
"grad_norm": 8.193948745727539,
|
11468 |
+
"learning_rate": 3.934357660316468e-07,
|
11469 |
+
"loss": 46.7504,
|
11470 |
+
"step": 1627
|
11471 |
+
},
|
11472 |
+
{
|
11473 |
+
"epoch": 2.915277000559597,
|
11474 |
+
"grad_norm": 7.961793422698975,
|
11475 |
+
"learning_rate": 3.7688233675439166e-07,
|
11476 |
+
"loss": 46.0781,
|
11477 |
+
"step": 1628
|
11478 |
+
},
|
11479 |
+
{
|
11480 |
+
"epoch": 2.9170677112479018,
|
11481 |
+
"grad_norm": 8.459404945373535,
|
11482 |
+
"learning_rate": 3.60684009463097e-07,
|
11483 |
+
"loss": 45.1151,
|
11484 |
+
"step": 1629
|
11485 |
+
},
|
11486 |
+
{
|
11487 |
+
"epoch": 2.918858421936206,
|
11488 |
+
"grad_norm": 10.196649551391602,
|
11489 |
+
"learning_rate": 3.4484084189593257e-07,
|
11490 |
+
"loss": 45.5705,
|
11491 |
+
"step": 1630
|
11492 |
+
},
|
11493 |
+
{
|
11494 |
+
"epoch": 2.9206491326245105,
|
11495 |
+
"grad_norm": 8.266303062438965,
|
11496 |
+
"learning_rate": 3.293528905251364e-07,
|
11497 |
+
"loss": 44.5015,
|
11498 |
+
"step": 1631
|
11499 |
+
},
|
11500 |
+
{
|
11501 |
+
"epoch": 2.922439843312815,
|
11502 |
+
"grad_norm": 8.931442260742188,
|
11503 |
+
"learning_rate": 3.1422021055679265e-07,
|
11504 |
+
"loss": 47.0272,
|
11505 |
+
"step": 1632
|
11506 |
+
},
|
11507 |
+
{
|
11508 |
+
"epoch": 2.9242305540011193,
|
11509 |
+
"grad_norm": 8.576935768127441,
|
11510 |
+
"learning_rate": 2.994428559306539e-07,
|
11511 |
+
"loss": 46.0597,
|
11512 |
+
"step": 1633
|
11513 |
+
},
|
11514 |
+
{
|
11515 |
+
"epoch": 2.9260212646894237,
|
11516 |
+
"grad_norm": 9.150681495666504,
|
11517 |
+
"learning_rate": 2.850208793198861e-07,
|
11518 |
+
"loss": 46.3989,
|
11519 |
+
"step": 1634
|
11520 |
+
},
|
11521 |
+
{
|
11522 |
+
"epoch": 2.927811975377728,
|
11523 |
+
"grad_norm": 9.138510704040527,
|
11524 |
+
"learning_rate": 2.709543321309793e-07,
|
11525 |
+
"loss": 46.3653,
|
11526 |
+
"step": 1635
|
11527 |
+
},
|
11528 |
+
{
|
11529 |
+
"epoch": 2.9296026860660325,
|
11530 |
+
"grad_norm": 9.75882625579834,
|
11531 |
+
"learning_rate": 2.572432645034817e-07,
|
11532 |
+
"loss": 46.9505,
|
11533 |
+
"step": 1636
|
11534 |
+
},
|
11535 |
+
{
|
11536 |
+
"epoch": 2.931393396754337,
|
11537 |
+
"grad_norm": 9.24614429473877,
|
11538 |
+
"learning_rate": 2.438877253098548e-07,
|
11539 |
+
"loss": 45.7768,
|
11540 |
+
"step": 1637
|
11541 |
+
},
|
11542 |
+
{
|
11543 |
+
"epoch": 2.9331841074426412,
|
11544 |
+
"grad_norm": 8.936918258666992,
|
11545 |
+
"learning_rate": 2.3088776215531848e-07,
|
11546 |
+
"loss": 46.8492,
|
11547 |
+
"step": 1638
|
11548 |
+
},
|
11549 |
+
{
|
11550 |
+
"epoch": 2.9349748181309456,
|
11551 |
+
"grad_norm": 9.657326698303223,
|
11552 |
+
"learning_rate": 2.1824342137760633e-07,
|
11553 |
+
"loss": 46.1916,
|
11554 |
+
"step": 1639
|
11555 |
+
},
|
11556 |
+
{
|
11557 |
+
"epoch": 2.93676552881925,
|
11558 |
+
"grad_norm": 9.459945678710938,
|
11559 |
+
"learning_rate": 2.0595474804691038e-07,
|
11560 |
+
"loss": 45.4075,
|
11561 |
+
"step": 1640
|
11562 |
+
},
|
11563 |
+
{
|
11564 |
+
"epoch": 2.9385562395075544,
|
11565 |
+
"grad_norm": 9.631020545959473,
|
11566 |
+
"learning_rate": 1.940217859656257e-07,
|
11567 |
+
"loss": 45.8027,
|
11568 |
+
"step": 1641
|
11569 |
+
},
|
11570 |
+
{
|
11571 |
+
"epoch": 2.940346950195859,
|
11572 |
+
"grad_norm": 10.288619041442871,
|
11573 |
+
"learning_rate": 1.824445776682504e-07,
|
11574 |
+
"loss": 45.495,
|
11575 |
+
"step": 1642
|
11576 |
+
},
|
11577 |
+
{
|
11578 |
+
"epoch": 2.942137660884163,
|
11579 |
+
"grad_norm": 9.248783111572266,
|
11580 |
+
"learning_rate": 1.712231644212081e-07,
|
11581 |
+
"loss": 45.5225,
|
11582 |
+
"step": 1643
|
11583 |
+
},
|
11584 |
+
{
|
11585 |
+
"epoch": 2.9439283715724676,
|
11586 |
+
"grad_norm": 9.824495315551758,
|
11587 |
+
"learning_rate": 1.6035758622269247e-07,
|
11588 |
+
"loss": 47.1098,
|
11589 |
+
"step": 1644
|
11590 |
+
},
|
11591 |
+
{
|
11592 |
+
"epoch": 2.9457190822607724,
|
11593 |
+
"grad_norm": 9.428007125854492,
|
11594 |
+
"learning_rate": 1.4984788180256726e-07,
|
11595 |
+
"loss": 47.0757,
|
11596 |
+
"step": 1645
|
11597 |
+
},
|
11598 |
+
{
|
11599 |
+
"epoch": 2.947509792949077,
|
11600 |
+
"grad_norm": 9.915323257446289,
|
11601 |
+
"learning_rate": 1.3969408862217758e-07,
|
11602 |
+
"loss": 46.4815,
|
11603 |
+
"step": 1646
|
11604 |
+
},
|
11605 |
+
{
|
11606 |
+
"epoch": 2.949300503637381,
|
11607 |
+
"grad_norm": 10.301142692565918,
|
11608 |
+
"learning_rate": 1.2989624287425007e-07,
|
11609 |
+
"loss": 45.5609,
|
11610 |
+
"step": 1647
|
11611 |
+
},
|
11612 |
+
{
|
11613 |
+
"epoch": 2.9510912143256856,
|
11614 |
+
"grad_norm": 10.241244316101074,
|
11615 |
+
"learning_rate": 1.204543794827595e-07,
|
11616 |
+
"loss": 46.7959,
|
11617 |
+
"step": 1648
|
11618 |
+
},
|
11619 |
+
{
|
11620 |
+
"epoch": 2.95288192501399,
|
11621 |
+
"grad_norm": 10.639913558959961,
|
11622 |
+
"learning_rate": 1.1136853210278463e-07,
|
11623 |
+
"loss": 46.3414,
|
11624 |
+
"step": 1649
|
11625 |
+
},
|
11626 |
+
{
|
11627 |
+
"epoch": 2.9546726357022943,
|
11628 |
+
"grad_norm": 10.630585670471191,
|
11629 |
+
"learning_rate": 1.0263873312040818e-07,
|
11630 |
+
"loss": 45.8505,
|
11631 |
+
"step": 1650
|
11632 |
+
},
|
11633 |
+
{
|
11634 |
+
"epoch": 2.9564633463905987,
|
11635 |
+
"grad_norm": 10.834024429321289,
|
11636 |
+
"learning_rate": 9.426501365259465e-08,
|
11637 |
+
"loss": 45.8424,
|
11638 |
+
"step": 1651
|
11639 |
+
},
|
11640 |
+
{
|
11641 |
+
"epoch": 2.958254057078903,
|
11642 |
+
"grad_norm": 11.740537643432617,
|
11643 |
+
"learning_rate": 8.624740354707949e-08,
|
11644 |
+
"loss": 45.8525,
|
11645 |
+
"step": 1652
|
11646 |
+
},
|
11647 |
+
{
|
11648 |
+
"epoch": 2.9600447677672075,
|
11649 |
+
"grad_norm": 10.828522682189941,
|
11650 |
+
"learning_rate": 7.85859313822579e-08,
|
11651 |
+
"loss": 45.7175,
|
11652 |
+
"step": 1653
|
11653 |
+
},
|
11654 |
+
{
|
11655 |
+
"epoch": 2.961835478455512,
|
11656 |
+
"grad_norm": 11.889662742614746,
|
11657 |
+
"learning_rate": 7.128062446709604e-08,
|
11658 |
+
"loss": 45.1248,
|
11659 |
+
"step": 1654
|
11660 |
+
},
|
11661 |
+
{
|
11662 |
+
"epoch": 2.9636261891438167,
|
11663 |
+
"grad_norm": 12.827479362487793,
|
11664 |
+
"learning_rate": 6.433150884102012e-08,
|
11665 |
+
"loss": 46.8401,
|
11666 |
+
"step": 1655
|
11667 |
+
},
|
11668 |
+
{
|
11669 |
+
"epoch": 2.965416899832121,
|
11670 |
+
"grad_norm": 11.234233856201172,
|
11671 |
+
"learning_rate": 5.773860927383856e-08,
|
11672 |
+
"loss": 45.9078,
|
11673 |
+
"step": 1656
|
11674 |
+
},
|
11675 |
+
{
|
11676 |
+
"epoch": 2.9672076105204255,
|
11677 |
+
"grad_norm": 11.893027305603027,
|
11678 |
+
"learning_rate": 5.150194926561991e-08,
|
11679 |
+
"loss": 46.3441,
|
11680 |
+
"step": 1657
|
11681 |
+
},
|
11682 |
+
{
|
11683 |
+
"epoch": 2.96899832120873,
|
11684 |
+
"grad_norm": 12.164173126220703,
|
11685 |
+
"learning_rate": 4.562155104665955e-08,
|
11686 |
+
"loss": 45.0211,
|
11687 |
+
"step": 1658
|
11688 |
+
},
|
11689 |
+
{
|
11690 |
+
"epoch": 2.9707890318970342,
|
11691 |
+
"grad_norm": 11.903395652770996,
|
11692 |
+
"learning_rate": 4.009743557736867e-08,
|
11693 |
+
"loss": 44.686,
|
11694 |
+
"step": 1659
|
11695 |
+
},
|
11696 |
+
{
|
11697 |
+
"epoch": 2.9725797425853386,
|
11698 |
+
"grad_norm": 12.025436401367188,
|
11699 |
+
"learning_rate": 3.492962254819654e-08,
|
11700 |
+
"loss": 44.5997,
|
11701 |
+
"step": 1660
|
11702 |
+
},
|
11703 |
+
{
|
11704 |
+
"epoch": 2.974370453273643,
|
11705 |
+
"grad_norm": 14.756345748901367,
|
11706 |
+
"learning_rate": 3.0118130379575005e-08,
|
11707 |
+
"loss": 46.8916,
|
11708 |
+
"step": 1661
|
11709 |
+
},
|
11710 |
+
{
|
11711 |
+
"epoch": 2.9761611639619474,
|
11712 |
+
"grad_norm": 13.509708404541016,
|
11713 |
+
"learning_rate": 2.5662976221840773e-08,
|
11714 |
+
"loss": 45.3565,
|
11715 |
+
"step": 1662
|
11716 |
+
},
|
11717 |
+
{
|
11718 |
+
"epoch": 2.977951874650252,
|
11719 |
+
"grad_norm": 13.354639053344727,
|
11720 |
+
"learning_rate": 2.1564175955191e-08,
|
11721 |
+
"loss": 44.1853,
|
11722 |
+
"step": 1663
|
11723 |
+
},
|
11724 |
+
{
|
11725 |
+
"epoch": 2.979742585338556,
|
11726 |
+
"grad_norm": 15.299182891845703,
|
11727 |
+
"learning_rate": 1.7821744189605582e-08,
|
11728 |
+
"loss": 45.265,
|
11729 |
+
"step": 1664
|
11730 |
+
},
|
11731 |
+
{
|
11732 |
+
"epoch": 2.9815332960268606,
|
11733 |
+
"grad_norm": 17.097658157348633,
|
11734 |
+
"learning_rate": 1.4435694264802735e-08,
|
11735 |
+
"loss": 47.0729,
|
11736 |
+
"step": 1665
|
11737 |
+
},
|
11738 |
+
{
|
11739 |
+
"epoch": 2.983324006715165,
|
11740 |
+
"grad_norm": 20.666465759277344,
|
11741 |
+
"learning_rate": 1.1406038250205698e-08,
|
11742 |
+
"loss": 45.6816,
|
11743 |
+
"step": 1666
|
11744 |
+
},
|
11745 |
+
{
|
11746 |
+
"epoch": 2.9851147174034693,
|
11747 |
+
"grad_norm": 35.938072204589844,
|
11748 |
+
"learning_rate": 8.732786944887217e-09,
|
11749 |
+
"loss": 45.4332,
|
11750 |
+
"step": 1667
|
11751 |
+
},
|
11752 |
+
{
|
11753 |
+
"epoch": 2.9869054280917737,
|
11754 |
+
"grad_norm": 9.02865982055664,
|
11755 |
+
"learning_rate": 6.41594987752514e-09,
|
11756 |
+
"loss": 48.6141,
|
11757 |
+
"step": 1668
|
11758 |
+
},
|
11759 |
+
{
|
11760 |
+
"epoch": 2.988696138780078,
|
11761 |
+
"grad_norm": 8.4238862991333,
|
11762 |
+
"learning_rate": 4.45553530638021e-09,
|
11763 |
+
"loss": 46.6257,
|
11764 |
+
"step": 1669
|
11765 |
+
},
|
11766 |
+
{
|
11767 |
+
"epoch": 2.9904868494683825,
|
11768 |
+
"grad_norm": 9.113216400146484,
|
11769 |
+
"learning_rate": 2.851550219240551e-09,
|
11770 |
+
"loss": 47.1215,
|
11771 |
+
"step": 1670
|
11772 |
+
},
|
11773 |
+
{
|
11774 |
+
"epoch": 2.9922775601566873,
|
11775 |
+
"grad_norm": 9.337307929992676,
|
11776 |
+
"learning_rate": 1.6040003334660825e-09,
|
11777 |
+
"loss": 46.2172,
|
11778 |
+
"step": 1671
|
11779 |
+
},
|
11780 |
+
{
|
11781 |
+
"epoch": 2.9940682708449917,
|
11782 |
+
"grad_norm": 10.280569076538086,
|
11783 |
+
"learning_rate": 7.128900958774942e-10,
|
11784 |
+
"loss": 46.1396,
|
11785 |
+
"step": 1672
|
11786 |
+
},
|
11787 |
+
{
|
11788 |
+
"epoch": 2.995858981533296,
|
11789 |
+
"grad_norm": 10.593201637268066,
|
11790 |
+
"learning_rate": 1.7822268278955278e-10,
|
11791 |
+
"loss": 46.0949,
|
11792 |
+
"step": 1673
|
11793 |
+
},
|
11794 |
+
{
|
11795 |
+
"epoch": 2.9976496922216005,
|
11796 |
+
"grad_norm": 11.723645210266113,
|
11797 |
+
"learning_rate": 0.0,
|
11798 |
+
"loss": 45.7927,
|
11799 |
+
"step": 1674
|
11800 |
}
|
11801 |
],
|
11802 |
"logging_steps": 1,
|
|
|
11820 |
"should_evaluate": false,
|
11821 |
"should_log": false,
|
11822 |
"should_save": true,
|
11823 |
+
"should_training_stop": true
|
11824 |
},
|
11825 |
"attributes": {}
|
11826 |
}
|
11827 |
},
|
11828 |
+
"total_flos": 2.539421652882555e+17,
|
11829 |
"train_batch_size": 4,
|
11830 |
"trial_name": null,
|
11831 |
"trial_params": null
|