{ "best_metric": 0.03063076362013817, "best_model_checkpoint": "/workspace/training_output/age-gender-vit-large-patch16-384-v1/checkpoint-12285", "epoch": 5.0, "eval_steps": 500, "global_step": 12285, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2035002035002035, "grad_norm": 0.2408568114042282, "learning_rate": 1.9185999185999185e-05, "loss": 0.1496, "step": 500 }, { "epoch": 0.407000407000407, "grad_norm": 0.0075986310839653015, "learning_rate": 1.8371998371998375e-05, "loss": 0.0806, "step": 1000 }, { "epoch": 0.6105006105006106, "grad_norm": 0.008936968632042408, "learning_rate": 1.755799755799756e-05, "loss": 0.0718, "step": 1500 }, { "epoch": 0.814000814000814, "grad_norm": 0.008683616295456886, "learning_rate": 1.6743996743996746e-05, "loss": 0.084, "step": 2000 }, { "epoch": 1.0, "eval_accuracy": 0.990484429065744, "eval_loss": 0.04229868948459625, "eval_runtime": 319.7158, "eval_samples_per_second": 10.847, "eval_steps_per_second": 1.357, "step": 2457 }, { "epoch": 1.0175010175010175, "grad_norm": 0.016035890206694603, "learning_rate": 1.5929995929995933e-05, "loss": 0.0496, "step": 2500 }, { "epoch": 1.221001221001221, "grad_norm": 0.0016629098681733012, "learning_rate": 1.5115995115995116e-05, "loss": 0.0182, "step": 3000 }, { "epoch": 1.4245014245014245, "grad_norm": 0.004986602813005447, "learning_rate": 1.4301994301994305e-05, "loss": 0.0408, "step": 3500 }, { "epoch": 1.6280016280016278, "grad_norm": 12.01570987701416, "learning_rate": 1.348799348799349e-05, "loss": 0.0297, "step": 4000 }, { "epoch": 1.8315018315018317, "grad_norm": 0.0010504246456548572, "learning_rate": 1.2673992673992674e-05, "loss": 0.0233, "step": 4500 }, { "epoch": 2.0, "eval_accuracy": 0.9927912341407151, "eval_loss": 0.035268500447273254, "eval_runtime": 325.0624, "eval_samples_per_second": 10.669, "eval_steps_per_second": 1.335, "step": 4914 }, { "epoch": 2.035002035002035, "grad_norm": 0.0030849070753902197, "learning_rate": 1.1859991859991862e-05, "loss": 0.0205, "step": 5000 }, { "epoch": 2.2385022385022384, "grad_norm": 0.0012894818792119622, "learning_rate": 1.1045991045991047e-05, "loss": 0.0097, "step": 5500 }, { "epoch": 2.442002442002442, "grad_norm": 0.0003367273311596364, "learning_rate": 1.0231990231990233e-05, "loss": 0.0051, "step": 6000 }, { "epoch": 2.6455026455026456, "grad_norm": 0.0010951660806313157, "learning_rate": 9.417989417989418e-06, "loss": 0.012, "step": 6500 }, { "epoch": 2.849002849002849, "grad_norm": 0.006617186591029167, "learning_rate": 8.603988603988605e-06, "loss": 0.009, "step": 7000 }, { "epoch": 3.0, "eval_accuracy": 0.994232987312572, "eval_loss": 0.040446773171424866, "eval_runtime": 328.2567, "eval_samples_per_second": 10.565, "eval_steps_per_second": 1.322, "step": 7371 }, { "epoch": 3.0525030525030523, "grad_norm": 0.0005593279493041337, "learning_rate": 7.78998778998779e-06, "loss": 0.0122, "step": 7500 }, { "epoch": 3.256003256003256, "grad_norm": 5.2912073442712426e-05, "learning_rate": 6.975986975986977e-06, "loss": 0.0036, "step": 8000 }, { "epoch": 3.4595034595034595, "grad_norm": 0.0004164891433902085, "learning_rate": 6.161986161986162e-06, "loss": 0.0056, "step": 8500 }, { "epoch": 3.663003663003663, "grad_norm": 0.00016138925275299698, "learning_rate": 5.347985347985348e-06, "loss": 0.0095, "step": 9000 }, { "epoch": 3.8665038665038667, "grad_norm": 0.0001516578340670094, "learning_rate": 4.533984533984534e-06, "loss": 0.0056, "step": 9500 }, { "epoch": 4.0, "eval_accuracy": 0.9950980392156863, "eval_loss": 0.031163498759269714, "eval_runtime": 316.2806, "eval_samples_per_second": 10.965, "eval_steps_per_second": 1.372, "step": 9828 }, { "epoch": 4.07000407000407, "grad_norm": 5.3938976634526625e-05, "learning_rate": 3.7199837199837202e-06, "loss": 0.0001, "step": 10000 }, { "epoch": 4.273504273504273, "grad_norm": 0.00023969257017597556, "learning_rate": 2.9059829059829063e-06, "loss": 0.0009, "step": 10500 }, { "epoch": 4.477004477004477, "grad_norm": 0.0021630951669067144, "learning_rate": 2.091982091982092e-06, "loss": 0.0026, "step": 11000 }, { "epoch": 4.68050468050468, "grad_norm": 0.0001431748241884634, "learning_rate": 1.2779812779812782e-06, "loss": 0.0017, "step": 11500 }, { "epoch": 4.884004884004884, "grad_norm": 0.00023103418061509728, "learning_rate": 4.63980463980464e-07, "loss": 0.0, "step": 12000 }, { "epoch": 5.0, "eval_accuracy": 0.9950980392156863, "eval_loss": 0.03063076362013817, "eval_runtime": 325.9677, "eval_samples_per_second": 10.639, "eval_steps_per_second": 1.331, "step": 12285 }, { "epoch": 5.0, "step": 12285, "total_flos": 7.920463730427888e+19, "train_loss": 0.026289946709994166, "train_runtime": 16154.4085, "train_samples_per_second": 6.083, "train_steps_per_second": 0.76 } ], "logging_steps": 500, "max_steps": 12285, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.920463730427888e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }