|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 48.569252014160156, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -1.8510873317718506, |
|
"logits/rejected": -0.29376277327537537, |
|
"logps/chosen": -214.10960388183594, |
|
"logps/rejected": -737.373291015625, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.006722441408783197, |
|
"rewards/margins": 0.026040678843855858, |
|
"rewards/rejected": -0.019318239763379097, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 14.449111938476562, |
|
"learning_rate": 9.979871469976195e-07, |
|
"logits/chosen": -1.8497416973114014, |
|
"logits/rejected": -0.2751065790653229, |
|
"logps/chosen": -240.32241821289062, |
|
"logps/rejected": -844.6730346679688, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": 0.029739724472165108, |
|
"rewards/margins": 0.6849702596664429, |
|
"rewards/rejected": -0.6552305817604065, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.7451657056808472, |
|
"learning_rate": 9.755282581475767e-07, |
|
"logits/chosen": -2.152054786682129, |
|
"logits/rejected": -0.8167506456375122, |
|
"logps/chosen": -241.89797973632812, |
|
"logps/rejected": -830.0802612304688, |
|
"loss": 0.1085, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -0.33556511998176575, |
|
"rewards/margins": 4.663994312286377, |
|
"rewards/rejected": -4.99955940246582, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.9800211191177368, |
|
"learning_rate": 9.29224396800933e-07, |
|
"logits/chosen": -2.6119227409362793, |
|
"logits/rejected": -1.6330392360687256, |
|
"logps/chosen": -261.77215576171875, |
|
"logps/rejected": -951.4508666992188, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.5927298069000244, |
|
"rewards/margins": 14.331648826599121, |
|
"rewards/rejected": -16.924379348754883, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.08055471628904343, |
|
"learning_rate": 8.613974319136957e-07, |
|
"logits/chosen": -2.8284103870391846, |
|
"logits/rejected": -2.016091823577881, |
|
"logps/chosen": -267.1216735839844, |
|
"logps/rejected": -1083.85693359375, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.838659286499023, |
|
"rewards/margins": 24.708829879760742, |
|
"rewards/rejected": -29.547487258911133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.07454250752925873, |
|
"learning_rate": 7.754484907260512e-07, |
|
"logits/chosen": -2.8392865657806396, |
|
"logits/rejected": -2.14508056640625, |
|
"logps/chosen": -296.3800964355469, |
|
"logps/rejected": -1137.64892578125, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -6.760532379150391, |
|
"rewards/margins": 29.1066951751709, |
|
"rewards/rejected": -35.86723327636719, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.10226113349199295, |
|
"learning_rate": 6.756874120406714e-07, |
|
"logits/chosen": -2.863455295562744, |
|
"logits/rejected": -2.1374523639678955, |
|
"logps/chosen": -282.3721008300781, |
|
"logps/rejected": -1143.27587890625, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.9874999523162842, |
|
"rewards/chosen": -6.247722148895264, |
|
"rewards/margins": 30.036664962768555, |
|
"rewards/rejected": -36.28438949584961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.06721244752407074, |
|
"learning_rate": 5.671166329088277e-07, |
|
"logits/chosen": -2.756829023361206, |
|
"logits/rejected": -2.045252799987793, |
|
"logps/chosen": -291.5985412597656, |
|
"logps/rejected": -1122.9361572265625, |
|
"loss": 0.019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.791654586791992, |
|
"rewards/margins": 27.7723331451416, |
|
"rewards/rejected": -33.563987731933594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.07295263558626175, |
|
"learning_rate": 4.5518034554828327e-07, |
|
"logits/chosen": -2.751217842102051, |
|
"logits/rejected": -2.025172233581543, |
|
"logps/chosen": -286.55694580078125, |
|
"logps/rejected": -1081.6651611328125, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -5.1540751457214355, |
|
"rewards/margins": 25.992502212524414, |
|
"rewards/rejected": -31.146577835083008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.08157353848218918, |
|
"learning_rate": 3.454915028125263e-07, |
|
"logits/chosen": -2.746480703353882, |
|
"logits/rejected": -1.9662470817565918, |
|
"logps/chosen": -271.390869140625, |
|
"logps/rejected": -1062.62109375, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.289627552032471, |
|
"rewards/margins": 24.77456283569336, |
|
"rewards/rejected": -29.06418800354004, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.1165793165564537, |
|
"learning_rate": 2.4355036129704696e-07, |
|
"logits/chosen": -2.7295165061950684, |
|
"logits/rejected": -1.951841950416565, |
|
"logps/chosen": -265.2632141113281, |
|
"logps/rejected": -1057.536376953125, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.240163803100586, |
|
"rewards/margins": 23.72610092163086, |
|
"rewards/rejected": -27.966266632080078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.06965842097997665, |
|
"learning_rate": 1.5446867550656767e-07, |
|
"logits/chosen": -2.7134926319122314, |
|
"logits/rejected": -1.9151828289031982, |
|
"logps/chosen": -272.508056640625, |
|
"logps/rejected": -1058.3094482421875, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.9906249642372131, |
|
"rewards/chosen": -4.029051303863525, |
|
"rewards/margins": 23.791399002075195, |
|
"rewards/rejected": -27.82044792175293, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.06551803648471832, |
|
"learning_rate": 8.271337313934867e-08, |
|
"logits/chosen": -2.6546225547790527, |
|
"logits/rejected": -1.8665531873703003, |
|
"logps/chosen": -285.0191650390625, |
|
"logps/rejected": -1041.3951416015625, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -3.9543297290802, |
|
"rewards/margins": 22.66595458984375, |
|
"rewards/rejected": -26.620285034179688, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.07770426571369171, |
|
"learning_rate": 3.188256468013139e-08, |
|
"logits/chosen": -2.7073161602020264, |
|
"logits/rejected": -1.8695634603500366, |
|
"logps/chosen": -265.3280029296875, |
|
"logps/rejected": -1062.6976318359375, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -3.531825304031372, |
|
"rewards/margins": 23.626256942749023, |
|
"rewards/rejected": -27.158079147338867, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.06992675364017487, |
|
"learning_rate": 4.5251191160326495e-09, |
|
"logits/chosen": -2.6707656383514404, |
|
"logits/rejected": -1.817920446395874, |
|
"logps/chosen": -286.8282165527344, |
|
"logps/rejected": -1117.4290771484375, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.840498924255371, |
|
"rewards/margins": 23.849292755126953, |
|
"rewards/rejected": -27.68979263305664, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 156, |
|
"total_flos": 1.1115841451898962e+18, |
|
"train_loss": 0.10875998093531682, |
|
"train_runtime": 5515.1168, |
|
"train_samples_per_second": 0.907, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1115841451898962e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|