{ "best_metric": 8.956038955128788, "best_model_checkpoint": "./checkpoint-1000", "epoch": 66.66666666666667, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.67, "learning_rate": 2.0000000000000003e-06, "loss": 1.3192, "step": 25 }, { "epoch": 1.67, "eval_loss": 1.121928334236145, "eval_runtime": 50.0721, "eval_samples_per_second": 4.773, "eval_steps_per_second": 0.599, "eval_wer": 12.633111859470283, "step": 25 }, { "epoch": 3.33, "learning_rate": 4.5e-06, "loss": 0.8167, "step": 50 }, { "epoch": 3.33, "eval_loss": 0.5812408328056335, "eval_runtime": 51.2787, "eval_samples_per_second": 4.661, "eval_steps_per_second": 0.585, "eval_wer": 11.8594702830618, "step": 50 }, { "epoch": 5.0, "learning_rate": 7e-06, "loss": 0.2917, "step": 75 }, { "epoch": 5.0, "eval_loss": 0.16585290431976318, "eval_runtime": 54.4743, "eval_samples_per_second": 4.387, "eval_steps_per_second": 0.551, "eval_wer": 11.559115318103213, "step": 75 }, { "epoch": 6.67, "learning_rate": 9.5e-06, "loss": 0.102, "step": 100 }, { "epoch": 6.67, "eval_loss": 0.11166360974311829, "eval_runtime": 70.1469, "eval_samples_per_second": 3.407, "eval_steps_per_second": 0.428, "eval_wer": 16.710658050423227, "step": 100 }, { "epoch": 8.33, "learning_rate": 9.777777777777779e-06, "loss": 0.0569, "step": 125 }, { "epoch": 8.33, "eval_loss": 0.10560135543346405, "eval_runtime": 60.3537, "eval_samples_per_second": 3.96, "eval_steps_per_second": 0.497, "eval_wer": 10.76727041048512, "step": 125 }, { "epoch": 10.0, "learning_rate": 9.5e-06, "loss": 0.0357, "step": 150 }, { "epoch": 10.0, "eval_loss": 0.1075899600982666, "eval_runtime": 59.3773, "eval_samples_per_second": 4.025, "eval_steps_per_second": 0.505, "eval_wer": 10.494220442340948, "step": 150 }, { "epoch": 11.67, "learning_rate": 9.222222222222224e-06, "loss": 0.0223, "step": 175 }, { "epoch": 11.67, "eval_loss": 0.1100313812494278, "eval_runtime": 55.1808, "eval_samples_per_second": 4.331, "eval_steps_per_second": 0.544, "eval_wer": 8.591972330936562, "step": 175 }, { "epoch": 13.33, "learning_rate": 8.944444444444446e-06, "loss": 0.0151, "step": 200 }, { "epoch": 13.33, "eval_loss": 0.1135343462228775, "eval_runtime": 55.2247, "eval_samples_per_second": 4.328, "eval_steps_per_second": 0.543, "eval_wer": 9.720578865932465, "step": 200 }, { "epoch": 15.0, "learning_rate": 8.666666666666668e-06, "loss": 0.011, "step": 225 }, { "epoch": 15.0, "eval_loss": 0.11747801303863525, "eval_runtime": 53.1548, "eval_samples_per_second": 4.496, "eval_steps_per_second": 0.564, "eval_wer": 9.820697187585328, "step": 225 }, { "epoch": 16.67, "learning_rate": 8.38888888888889e-06, "loss": 0.0082, "step": 250 }, { "epoch": 16.67, "eval_loss": 0.1196230873465538, "eval_runtime": 53.3776, "eval_samples_per_second": 4.478, "eval_steps_per_second": 0.562, "eval_wer": 9.256393920087376, "step": 250 }, { "epoch": 18.33, "learning_rate": 8.111111111111112e-06, "loss": 0.0064, "step": 275 }, { "epoch": 18.33, "eval_loss": 0.12411758303642273, "eval_runtime": 52.8643, "eval_samples_per_second": 4.521, "eval_steps_per_second": 0.567, "eval_wer": 9.329207244925822, "step": 275 }, { "epoch": 20.0, "learning_rate": 7.833333333333333e-06, "loss": 0.0054, "step": 300 }, { "epoch": 20.0, "eval_loss": 0.12504205107688904, "eval_runtime": 52.7393, "eval_samples_per_second": 4.532, "eval_steps_per_second": 0.569, "eval_wer": 9.511240557021935, "step": 300 }, { "epoch": 21.67, "learning_rate": 7.555555555555556e-06, "loss": 0.0045, "step": 325 }, { "epoch": 21.67, "eval_loss": 0.12675844132900238, "eval_runtime": 52.6746, "eval_samples_per_second": 4.537, "eval_steps_per_second": 0.57, "eval_wer": 9.756985528351688, "step": 325 }, { "epoch": 23.33, "learning_rate": 7.277777777777778e-06, "loss": 0.004, "step": 350 }, { "epoch": 23.33, "eval_loss": 0.12834692001342773, "eval_runtime": 52.8297, "eval_samples_per_second": 4.524, "eval_steps_per_second": 0.568, "eval_wer": 9.747883862746882, "step": 350 }, { "epoch": 25.0, "learning_rate": 7e-06, "loss": 0.0036, "step": 375 }, { "epoch": 25.0, "eval_loss": 0.13014227151870728, "eval_runtime": 52.9358, "eval_samples_per_second": 4.515, "eval_steps_per_second": 0.567, "eval_wer": 9.747883862746882, "step": 375 }, { "epoch": 26.67, "learning_rate": 6.7222222222222235e-06, "loss": 0.0032, "step": 400 }, { "epoch": 26.67, "eval_loss": 0.13117919862270355, "eval_runtime": 52.9358, "eval_samples_per_second": 4.515, "eval_steps_per_second": 0.567, "eval_wer": 9.85710385000455, "step": 400 }, { "epoch": 28.33, "learning_rate": 6.444444444444445e-06, "loss": 0.0029, "step": 425 }, { "epoch": 28.33, "eval_loss": 0.13185520470142365, "eval_runtime": 52.8188, "eval_samples_per_second": 4.525, "eval_steps_per_second": 0.568, "eval_wer": 9.593155547465186, "step": 425 }, { "epoch": 30.0, "learning_rate": 6.166666666666667e-06, "loss": 0.0027, "step": 450 }, { "epoch": 30.0, "eval_loss": 0.13342446088790894, "eval_runtime": 53.6787, "eval_samples_per_second": 4.452, "eval_steps_per_second": 0.559, "eval_wer": 10.002730499681443, "step": 450 }, { "epoch": 31.67, "learning_rate": 5.88888888888889e-06, "loss": 0.0025, "step": 475 }, { "epoch": 31.67, "eval_loss": 0.13465769588947296, "eval_runtime": 53.9247, "eval_samples_per_second": 4.432, "eval_steps_per_second": 0.556, "eval_wer": 10.075543824519889, "step": 475 }, { "epoch": 33.33, "learning_rate": 5.611111111111112e-06, "loss": 0.0023, "step": 500 }, { "epoch": 33.33, "eval_loss": 0.13523031771183014, "eval_runtime": 53.7628, "eval_samples_per_second": 4.445, "eval_steps_per_second": 0.558, "eval_wer": 9.911713843633386, "step": 500 }, { "epoch": 35.0, "learning_rate": 5.333333333333334e-06, "loss": 0.0023, "step": 525 }, { "epoch": 35.0, "eval_loss": 0.135939821600914, "eval_runtime": 53.9196, "eval_samples_per_second": 4.433, "eval_steps_per_second": 0.556, "eval_wer": 9.747883862746882, "step": 525 }, { "epoch": 36.67, "learning_rate": 5.0555555555555555e-06, "loss": 0.0021, "step": 550 }, { "epoch": 36.67, "eval_loss": 0.13721704483032227, "eval_runtime": 52.564, "eval_samples_per_second": 4.547, "eval_steps_per_second": 0.571, "eval_wer": 9.502138891417129, "step": 550 }, { "epoch": 38.33, "learning_rate": 4.777777777777778e-06, "loss": 0.002, "step": 575 }, { "epoch": 38.33, "eval_loss": 0.13715384900569916, "eval_runtime": 52.7245, "eval_samples_per_second": 4.533, "eval_steps_per_second": 0.569, "eval_wer": 9.402020569764268, "step": 575 }, { "epoch": 40.0, "learning_rate": 4.5e-06, "loss": 0.0019, "step": 600 }, { "epoch": 40.0, "eval_loss": 0.13845382630825043, "eval_runtime": 52.9858, "eval_samples_per_second": 4.511, "eval_steps_per_second": 0.566, "eval_wer": 9.356512241740239, "step": 600 }, { "epoch": 41.67, "learning_rate": 4.222222222222223e-06, "loss": 0.0019, "step": 625 }, { "epoch": 41.67, "eval_loss": 0.13895590603351593, "eval_runtime": 53.9436, "eval_samples_per_second": 4.431, "eval_steps_per_second": 0.556, "eval_wer": 9.338308910530627, "step": 625 }, { "epoch": 43.33, "learning_rate": 3.944444444444445e-06, "loss": 0.0018, "step": 650 }, { "epoch": 43.33, "eval_loss": 0.13908667862415314, "eval_runtime": 53.9631, "eval_samples_per_second": 4.429, "eval_steps_per_second": 0.556, "eval_wer": 9.429325566578683, "step": 650 }, { "epoch": 45.0, "learning_rate": 3.6666666666666666e-06, "loss": 0.0018, "step": 675 }, { "epoch": 45.0, "eval_loss": 0.14008642733097076, "eval_runtime": 54.0041, "eval_samples_per_second": 4.426, "eval_steps_per_second": 0.556, "eval_wer": 9.20178392645854, "step": 675 }, { "epoch": 46.67, "learning_rate": 3.3888888888888893e-06, "loss": 0.0017, "step": 700 }, { "epoch": 46.67, "eval_loss": 0.14024624228477478, "eval_runtime": 53.9561, "eval_samples_per_second": 4.43, "eval_steps_per_second": 0.556, "eval_wer": 9.265495585692182, "step": 700 }, { "epoch": 48.33, "learning_rate": 3.1111111111111116e-06, "loss": 0.0016, "step": 725 }, { "epoch": 48.33, "eval_loss": 0.14074519276618958, "eval_runtime": 53.9054, "eval_samples_per_second": 4.434, "eval_steps_per_second": 0.557, "eval_wer": 9.192682260853736, "step": 725 }, { "epoch": 50.0, "learning_rate": 2.8333333333333335e-06, "loss": 0.0016, "step": 750 }, { "epoch": 50.0, "eval_loss": 0.14103808999061584, "eval_runtime": 53.9469, "eval_samples_per_second": 4.43, "eval_steps_per_second": 0.556, "eval_wer": 9.320105579321016, "step": 750 }, { "epoch": 51.67, "learning_rate": 2.5555555555555557e-06, "loss": 0.0016, "step": 775 }, { "epoch": 51.67, "eval_loss": 0.14155486226081848, "eval_runtime": 54.1139, "eval_samples_per_second": 4.417, "eval_steps_per_second": 0.554, "eval_wer": 8.965140620733594, "step": 775 }, { "epoch": 53.33, "learning_rate": 2.277777777777778e-06, "loss": 0.0016, "step": 800 }, { "epoch": 53.33, "eval_loss": 0.14154338836669922, "eval_runtime": 52.6271, "eval_samples_per_second": 4.541, "eval_steps_per_second": 0.57, "eval_wer": 9.147173932829707, "step": 800 }, { "epoch": 55.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0016, "step": 825 }, { "epoch": 55.0, "eval_loss": 0.14192937314510345, "eval_runtime": 53.0224, "eval_samples_per_second": 4.508, "eval_steps_per_second": 0.566, "eval_wer": 8.956038955128788, "step": 825 }, { "epoch": 56.67, "learning_rate": 1.7222222222222224e-06, "loss": 0.0015, "step": 850 }, { "epoch": 56.67, "eval_loss": 0.14206746220588684, "eval_runtime": 52.7981, "eval_samples_per_second": 4.527, "eval_steps_per_second": 0.568, "eval_wer": 9.147173932829707, "step": 850 }, { "epoch": 58.33, "learning_rate": 1.4444444444444445e-06, "loss": 0.0015, "step": 875 }, { "epoch": 58.33, "eval_loss": 0.14239700138568878, "eval_runtime": 53.091, "eval_samples_per_second": 4.502, "eval_steps_per_second": 0.565, "eval_wer": 8.937835623919177, "step": 875 }, { "epoch": 60.0, "learning_rate": 1.1666666666666668e-06, "loss": 0.0015, "step": 900 }, { "epoch": 60.0, "eval_loss": 0.14239582419395447, "eval_runtime": 53.251, "eval_samples_per_second": 4.488, "eval_steps_per_second": 0.563, "eval_wer": 8.883225630290344, "step": 900 }, { "epoch": 61.67, "learning_rate": 8.88888888888889e-07, "loss": 0.0014, "step": 925 }, { "epoch": 61.67, "eval_loss": 0.14283104240894318, "eval_runtime": 53.0384, "eval_samples_per_second": 4.506, "eval_steps_per_second": 0.566, "eval_wer": 8.883225630290344, "step": 925 }, { "epoch": 63.33, "learning_rate": 6.111111111111112e-07, "loss": 0.0014, "step": 950 }, { "epoch": 63.33, "eval_loss": 0.1429101526737213, "eval_runtime": 52.9861, "eval_samples_per_second": 4.511, "eval_steps_per_second": 0.566, "eval_wer": 8.965140620733594, "step": 950 }, { "epoch": 65.0, "learning_rate": 3.3333333333333335e-07, "loss": 0.0015, "step": 975 }, { "epoch": 65.0, "eval_loss": 0.14284275472164154, "eval_runtime": 52.9943, "eval_samples_per_second": 4.51, "eval_steps_per_second": 0.566, "eval_wer": 9.001547283152817, "step": 975 }, { "epoch": 66.67, "learning_rate": 5.555555555555556e-08, "loss": 0.0014, "step": 1000 }, { "epoch": 66.67, "eval_loss": 0.14299413561820984, "eval_runtime": 52.9847, "eval_samples_per_second": 4.511, "eval_steps_per_second": 0.566, "eval_wer": 8.956038955128788, "step": 1000 } ], "max_steps": 1000, "num_train_epochs": 67, "total_flos": 4.13391671525376e+18, "trial_name": null, "trial_params": null }