diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4280 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 150.0, + "eval_steps": 50, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "completion_length": 134.07291666666666, + "epoch": 1.0, + "kl": 0.0, + "learning_rate": 1.9999802608561367e-06, + "loss": -0.0, + "step": 1, + "tallyqa/reward": 1.4866814613342285, + "tallyqa/reward_std": 0.4142478108406067, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.01666666753590107, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.3499999940395355, + "vsr/reward": 1.3259153366088867, + "vsr/reward_std": 0.39282165467739105, + "vsr/rewards/answer_format_reward": 0.1875, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.18543857336044312, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.08749999850988388 + }, + { + "completion_length": 140.89583333333334, + "epoch": 2.0, + "grad_norm": 3.917215347290039, + "kl": 0.001917603115240733, + "learning_rate": 1.9999210442038163e-06, + "loss": 0.0, + "step": 2, + "tallyqa/reward": 1.5069048404693604, + "tallyqa/reward_std": 0.4036765992641449, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0, + "tallyqa/rewards/gpt_score_reward": 0.5249999761581421, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.42500001192092896, + "vsr/reward": 1.4385608434677124, + "vsr/reward_std": 0.3887203335762024, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.708548665046692, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.26250001788139343 + }, + { + "completion_length": 146.26041666666666, + "epoch": 3.0, + "grad_norm": 2.364192485809326, + "kl": 0.003155820071697235, + "learning_rate": 1.9998223523808087e-06, + "loss": 0.0, + "step": 3, + "tallyqa/reward": 1.736838459968567, + "tallyqa/reward_std": 0.4922752380371094, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.005773809738457203, + "tallyqa/rewards/gpt_score_reward": 0.25, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.42500001192092896, + "vsr/reward": 1.537052869796753, + "vsr/reward_std": 0.44190673530101776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0007919103372842073, + "vsr/rewards/gpt_score_reward": 0.125, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.4561836123466492, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.3125000074505806 + }, + { + "completion_length": 163.0, + "epoch": 4.0, + "grad_norm": 1.9837521314620972, + "kl": 0.008118260186165571, + "learning_rate": 1.9996841892832997e-06, + "loss": 0.0001, + "step": 4, + "tallyqa/reward": 1.579048792521159, + "tallyqa/reward_std": 0.3479228417078654, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.01325757623029252, + "tallyqa/rewards/gpt_score_reward": 0.375, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.5854272246360779, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.3333333333333333, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.3333333432674408 + }, + { + "completion_length": 136.875, + "epoch": 5.0, + "grad_norm": 1.9837521314620972, + "kl": 0.010113426949828863, + "learning_rate": 1.9995065603657316e-06, + "loss": 0.0001, + "step": 5, + "tallyqa/reward": 1.8480665683746338, + "tallyqa/reward_std": 0.4355837255716324, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.002083333383779973, + "tallyqa/rewards/gpt_score_reward": 0.32499998807907104, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.9127411246299744, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.30000001192092896, + "vsr/reward": 1.5676078796386719, + "vsr/reward_std": 0.3865228295326233, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.5, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.3499999940395355 + }, + { + "completion_length": 150.47916666666666, + "epoch": 6.0, + "grad_norm": 1.811820149421692, + "kl": 0.014832775729397932, + "learning_rate": 1.999289472640589e-06, + "loss": 0.0001, + "step": 6, + "tallyqa/reward": 1.7139761845270793, + "tallyqa/reward_std": 0.3523900906244914, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0034427609449873366, + "tallyqa/rewards/gpt_score_reward": 0.43333334227403003, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.3333333333333333, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4166666666666667 + }, + { + "completion_length": 143.19791666666666, + "epoch": 7.0, + "grad_norm": 2.507540464401245, + "kl": 0.016154826618731022, + "learning_rate": 1.999032934678125e-06, + "loss": 0.0001, + "step": 7, + "tallyqa/reward": 1.8438603281974792, + "tallyqa/reward_std": 0.35879386961460114, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.009365616017021239, + "tallyqa/rewards/gpt_score_reward": 0.375, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4124999940395355, + "vsr/reward": 1.6270601749420166, + "vsr/reward_std": 0.2692360579967499, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0006756756920367479, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.11149990558624268, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 144.09375, + "epoch": 8.0, + "grad_norm": 1.8922728300094604, + "kl": 0.022366659094889958, + "learning_rate": 1.9987369566060176e-06, + "loss": 0.0002, + "step": 8, + "tallyqa/reward": 1.8740159273147583, + "tallyqa/reward_std": 0.34303292632102966, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.005357143158713977, + "tallyqa/rewards/gpt_score_reward": 0.20833333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 137.60416666666666, + "epoch": 9.0, + "grad_norm": 1.8922728300094604, + "kl": 0.022063078979651134, + "learning_rate": 1.998401550108975e-06, + "loss": 0.0002, + "step": 9, + "tallyqa/reward": 2.040654242038727, + "tallyqa/reward_std": 0.45776721835136414, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.007041396340355277, + "tallyqa/rewards/gpt_score_reward": 0.4749999940395355, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.7488077878952026, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0625, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.9489328861236572, + "vsr/reward_std": 0.36075350642204285, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0074360123835504055, + "vsr/rewards/gpt_score_reward": 0.25, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.8546984195709229, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 140.83333333333334, + "epoch": 10.0, + "grad_norm": 1.9311944246292114, + "kl": 0.019539502449333668, + "learning_rate": 1.9980267284282714e-06, + "loss": 0.0001, + "step": 10, + "tallyqa/reward": 1.736696481704712, + "tallyqa/reward_std": 0.2783554494380951, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0, + "tallyqa/rewards/gpt_score_reward": 0.25, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.3292832374572754, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.946822702884674, + "vsr/reward_std": 0.3713572174310684, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.002211538376286626, + "vsr/rewards/gpt_score_reward": 0.0625, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.434103548526764, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 132.84375, + "epoch": 11.0, + "grad_norm": 1.3641120195388794, + "kl": 0.023106074581543606, + "learning_rate": 1.997612506361225e-06, + "loss": 0.0002, + "step": 11, + "tallyqa/reward": 1.8260396718978882, + "tallyqa/reward_std": 0.3715341240167618, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0062500000931322575, + "tallyqa/rewards/gpt_score_reward": 0.125, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.9330036640167236, + "vsr/reward_std": 0.47951656579971313, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0033730159047991037, + "vsr/rewards/gpt_score_reward": 0.25, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.42500001192092896 + }, + { + "completion_length": 127.0, + "epoch": 12.0, + "grad_norm": 3.5454864501953125, + "kl": 0.02361868942777316, + "learning_rate": 1.997158900260614e-06, + "loss": 0.0002, + "step": 12, + "tallyqa/reward": 1.810607671737671, + "tallyqa/reward_std": 0.2614850401878357, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0035714285913854837, + "tallyqa/rewards/gpt_score_reward": 0.25, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.9114383459091187, + "vsr/reward_std": 0.2458377331495285, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.003075892338529229, + "vsr/rewards/gpt_score_reward": 0.10000000149011612, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.21342733502388, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.54166666666667, + "epoch": 13.0, + "grad_norm": 3.5454864501953125, + "kl": 0.028619478767116863, + "learning_rate": 1.9966659280340295e-06, + "loss": 0.0002, + "step": 13, + "tallyqa/reward": 1.7978215217590332, + "tallyqa/reward_std": 0.31900638341903687, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.02812500111758709, + "tallyqa/rewards/gpt_score_reward": 0.5, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.9265738129615784, + "vsr/reward_std": 0.35742364823818207, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.004976851865649223, + "vsr/rewards/gpt_score_reward": 0.4375, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.7467288970947266, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 112.23958333333333, + "epoch": 14.0, + "grad_norm": 1.607553243637085, + "kl": 0.03216891052822272, + "learning_rate": 1.9961336091431724e-06, + "loss": 0.0002, + "step": 14, + "tallyqa/reward": 1.94424569606781, + "tallyqa/reward_std": 0.2746671736240387, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.01666666753590107, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.041201651096344, + "vsr/reward_std": 0.3417728692293167, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.01623654132708907, + "vsr/rewards/gpt_score_reward": 0.4375, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.0527513325214386, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 97.77083333333333, + "epoch": 15.0, + "grad_norm": 1.6643606424331665, + "kl": 0.04882542540629705, + "learning_rate": 1.99556196460308e-06, + "loss": 0.0004, + "step": 15, + "tallyqa/reward": 2.111328125, + "tallyqa/reward_std": 0.28855831921100616, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.01458333432674408, + "tallyqa/rewards/gpt_score_reward": 0.36250001192092896, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.75, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 1.8041400909423828, + "vsr/reward_std": 0.3709784746170044, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.027272727340459824, + "vsr/rewards/gpt_score_reward": 0.375, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 97.73958333333333, + "epoch": 16.0, + "grad_norm": 1.685754418373108, + "kl": 0.04873800526062647, + "learning_rate": 1.9949510169813e-06, + "loss": 0.0004, + "step": 16, + "tallyqa/reward": 2.0691108107566833, + "tallyqa/reward_std": 0.36812709271907806, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.007638889015652239, + "tallyqa/rewards/gpt_score_reward": 0.1875, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4749999940395355, + "vsr/reward": 2.240570545196533, + "vsr/reward_std": 0.4200352430343628, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.04194078966975212, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 93.73958333333333, + "epoch": 17.0, + "grad_norm": 1.685754418373108, + "kl": 0.0646279901266098, + "learning_rate": 1.9943007903969986e-06, + "loss": 0.0005, + "step": 17, + "tallyqa/reward": 1.951283574104309, + "tallyqa/reward_std": 0.3308020234107971, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0, + "tallyqa/rewards/gpt_score_reward": 0.125, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.1494187116622925, + "vsr/reward_std": 0.45735564827919006, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.01473611081019044, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.7028690576553345, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0625, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 89.39583333333333, + "epoch": 18.0, + "grad_norm": 2.4150192737579346, + "kl": 0.08309249083201091, + "learning_rate": 1.9936113105200084e-06, + "loss": 0.0006, + "step": 18, + "vsr/reward": 2.097624580065409, + "vsr/reward_std": 0.35077253977457684, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.02184782673915227, + "vsr/rewards/gpt_score_reward": 0.3166666626930237, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 78.78125, + "epoch": 19.0, + "grad_norm": 1.714619517326355, + "kl": 0.10424505919218063, + "learning_rate": 1.9928826045698135e-06, + "loss": 0.0008, + "step": 19, + "tallyqa/reward": 2.0189719200134277, + "tallyqa/reward_std": 0.3275202810764313, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.0, + "tallyqa/rewards/gpt_score_reward": 0.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.1255608797073364, + "vsr/reward_std": 0.3386826813220978, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.012500000186264515, + "vsr/rewards/gpt_score_reward": 0.125, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 80.22916666666667, + "epoch": 20.0, + "grad_norm": 2.0060973167419434, + "kl": 0.10539405047893524, + "learning_rate": 1.9921147013144777e-06, + "loss": 0.0008, + "step": 20, + "tallyqa/reward": 2.29019558429718, + "tallyqa/reward_std": 0.3563050925731659, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.037500000558793545, + "tallyqa/rewards/gpt_score_reward": 0.5, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.6433308720588684, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.0249814987182617, + "vsr/reward_std": 0.4223782420158386, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.04479166492819786, + "vsr/rewards/gpt_score_reward": 0.625, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 72.91666666666667, + "epoch": 21.0, + "grad_norm": 2.0060973167419434, + "kl": 0.14449070394039154, + "learning_rate": 1.9913076310695064e-06, + "loss": 0.0011, + "step": 21, + "vsr/reward": 2.299508968989054, + "vsr/reward_std": 0.3977599839369456, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.07604166865348816, + "vsr/rewards/gpt_score_reward": 0.8333333333333334, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.7842435836791992, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.041666666666666664, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 65.36458333333333, + "epoch": 22.0, + "grad_norm": 1.8574655055999756, + "kl": 0.19912627836068472, + "learning_rate": 1.990461425696651e-06, + "loss": 0.0015, + "step": 22, + "tallyqa/reward": 2.53125, + "tallyqa/reward_std": 0.2841745913028717, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.1153552532196045, + "vsr/reward_std": 0.30007025599479675, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.06376488134264946, + "vsr/rewards/gpt_score_reward": 0.7874999940395355, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 66.17708333333333, + "epoch": 23.0, + "grad_norm": 2.039736032485962, + "kl": 0.21003226935863495, + "learning_rate": 1.9895761186026508e-06, + "loss": 0.0016, + "step": 23, + "tallyqa/reward": 2.234821319580078, + "tallyqa/reward_std": 0.2777155637741089, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.02500000037252903, + "tallyqa/rewards/gpt_score_reward": 0.25, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.34020459651947, + "vsr/reward_std": 0.294268436729908, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.07500000111758709, + "vsr/rewards/gpt_score_reward": 0.9375, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 60.697916666666664, + "epoch": 24.0, + "grad_norm": 2.763547897338867, + "kl": 0.2109440714120865, + "learning_rate": 1.988651744737914e-06, + "loss": 0.0016, + "step": 24, + "tallyqa/reward": 2.4329166412353516, + "tallyqa/reward_std": 0.27096378803253174, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000298023224, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.301865816116333, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.332775354385376, + "vsr/reward_std": 0.3359612599015236, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.06250000093132257, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 64.27083333333333, + "epoch": 25.0, + "grad_norm": 2.763547897338867, + "kl": 0.30784689883391064, + "learning_rate": 1.9876883405951377e-06, + "loss": 0.0023, + "step": 25, + "tallyqa/reward": 2.2881696224212646, + "tallyqa/reward_std": 0.31395095586776733, + "tallyqa/rewards/answer_format_reward": 0.375, + "tallyqa/rewards/bleu_score_reward": 0.05000000074505806, + "tallyqa/rewards/gpt_score_reward": 0.5, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.42500001192092896, + "vsr/reward": 2.367447853088379, + "vsr/reward_std": 0.3491152822971344, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0729166679084301, + "vsr/rewards/gpt_score_reward": 0.4375, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 66.20833333333333, + "epoch": 26.0, + "grad_norm": 2.7890965938568115, + "kl": 0.23412353793780008, + "learning_rate": 1.986685944207868e-06, + "loss": 0.0018, + "step": 26, + "tallyqa/reward": 2.4003471533457437, + "tallyqa/reward_std": 0.33405352632204693, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.06666666672875483, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.6666666666666666, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 60.302083333333336, + "epoch": 27.0, + "grad_norm": 2.127520799636841, + "kl": 0.26359472672144574, + "learning_rate": 1.985644595148998e-06, + "loss": 0.002, + "step": 27, + "tallyqa/reward": 2.4453125, + "tallyqa/reward_std": 0.299396276473999, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.2773958444595337, + "vsr/reward_std": 0.2499445527791977, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0024999999441206455, + "vsr/rewards/gpt_score_reward": 0.0625, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.7495747804641724, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0625, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 61.885416666666664, + "epoch": 28.0, + "grad_norm": 2.237720251083374, + "kl": 0.36954864859580994, + "learning_rate": 1.9845643345292055e-06, + "loss": 0.0028, + "step": 28, + "tallyqa/reward": 2.2677083015441895, + "tallyqa/reward_std": 0.3648122251033783, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000111758709, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 56.416666666666664, + "epoch": 29.0, + "grad_norm": 2.237720251083374, + "kl": 0.2990276018778483, + "learning_rate": 1.98344520499533e-06, + "loss": 0.0022, + "step": 29, + "tallyqa/reward": 2.0749998092651367, + "tallyqa/reward_std": 0.20423170924186707, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.634374976158142, + "vsr/reward_std": 0.16133444011211395, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08750000223517418, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 52.270833333333336, + "epoch": 30.0, + "grad_norm": 1.4948984384536743, + "kl": 0.34115341305732727, + "learning_rate": 1.9822872507286887e-06, + "loss": 0.0026, + "step": 30, + "tallyqa/reward": 2.390625, + "tallyqa/reward_std": 0.2204940766096115, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.367968797683716, + "vsr/reward_std": 0.2283666878938675, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08750000223517418, + "vsr/rewards/gpt_score_reward": 0.875, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 51.666666666666664, + "epoch": 31.0, + "grad_norm": 1.7051966190338135, + "kl": 0.38281824191411334, + "learning_rate": 1.981090517443334e-06, + "loss": 0.0029, + "step": 31, + "tallyqa/reward": 2.277083396911621, + "tallyqa/reward_std": 0.36716967821121216, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.4380208253860474, + "vsr/reward_std": 0.16215276718139648, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.75, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0625, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 50.322916666666664, + "epoch": 32.0, + "grad_norm": 2.3981614112854004, + "kl": 0.3478986918926239, + "learning_rate": 1.9798550523842466e-06, + "loss": 0.0026, + "step": 32, + "tallyqa/reward": 2.492968797683716, + "tallyqa/reward_std": 0.2833399772644043, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000111758709, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.214062452316284, + "vsr/reward_std": 0.460480660200119, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 51.333333333333336, + "epoch": 33.0, + "grad_norm": 2.3981614112854004, + "kl": 0.33048046628634137, + "learning_rate": 1.978580904325472e-06, + "loss": 0.0025, + "step": 33, + "tallyqa/reward": 2.3992186784744263, + "tallyqa/reward_std": 0.21000532060861588, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08750000223517418, + "tallyqa/rewards/gpt_score_reward": 0.875, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.5687499046325684, + "vsr/reward_std": 0.09999999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 54.375, + "epoch": 34.0, + "grad_norm": 1.273506999015808, + "kl": 0.3394271930058797, + "learning_rate": 1.9772681235681933e-06, + "loss": 0.0025, + "step": 34, + "tallyqa/reward": 2.4257811307907104, + "tallyqa/reward_std": 0.2706783711910248, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.2948530912399292, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.5859375, + "vsr/reward_std": 0.003125001909211278, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 53.125, + "epoch": 35.0, + "grad_norm": 1.7573214769363403, + "kl": 0.3220557967821757, + "learning_rate": 1.9759167619387473e-06, + "loss": 0.0024, + "step": 35, + "tallyqa/reward": 2.402343511581421, + "tallyqa/reward_std": 0.4065489172935486, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.037500000558793545, + "tallyqa/rewards/gpt_score_reward": 0.375, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.4453125, + "vsr/reward_std": 0.10795938968658447, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -2.0, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 53.166666666666664, + "epoch": 36.0, + "grad_norm": 1.635746955871582, + "kl": 0.3221859534581502, + "learning_rate": 1.974526872786577e-06, + "loss": 0.0024, + "step": 36, + "tallyqa/reward": 2.4602429072062173, + "tallyqa/reward_std": 0.24478513995806375, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.05833333544433117, + "tallyqa/rewards/gpt_score_reward": 0.5833333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 56.322916666666664, + "epoch": 37.0, + "grad_norm": 1.635746955871582, + "kl": 0.3111926217873891, + "learning_rate": 1.9730985109821263e-06, + "loss": 0.0023, + "step": 37, + "tallyqa/reward": 2.84375, + "tallyqa/reward_std": 0.19999998807907104, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000298023224, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.3648436069488525, + "vsr/reward_std": 0.20975451171398163, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.9552147388458252, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 60.1875, + "epoch": 38.0, + "grad_norm": 1.5666120052337646, + "kl": 0.2767863521973292, + "learning_rate": 1.971631732914674e-06, + "loss": 0.0021, + "step": 38, + "tallyqa/reward": 2.5281248092651367, + "tallyqa/reward_std": 0.35392335057258606, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.06250000186264515, + "tallyqa/rewards/gpt_score_reward": 0.625, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.75, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4749999940395355, + "vsr/reward": 2.5015623569488525, + "vsr/reward_std": 0.30288511514663696, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -1.4842302799224854, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.125, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 67.4375, + "epoch": 39.0, + "grad_norm": 1.98360013961792, + "kl": 0.2748785614967346, + "learning_rate": 1.970126596490106e-06, + "loss": 0.0021, + "step": 39, + "tallyqa/reward": 2.663541555404663, + "tallyqa/reward_std": 0.28023363649845123, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.058333334823449455, + "tallyqa/rewards/gpt_score_reward": 0.5833333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.775602658589681, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.475000003973643 + }, + { + "completion_length": 79.33333333333333, + "epoch": 40.0, + "grad_norm": 1.628704309463501, + "kl": 0.25233572721481323, + "learning_rate": 1.968583161128631e-06, + "loss": 0.0019, + "step": 40, + "tallyqa/reward": 2.6973957220713296, + "tallyqa/reward_std": 0.3090879023075104, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.4478542407353718, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 88.52083333333333, + "epoch": 41.0, + "grad_norm": 1.628704309463501, + "kl": 0.2603913148244222, + "learning_rate": 1.967001487762435e-06, + "loss": 0.002, + "step": 41, + "tallyqa/reward": 2.8406248092651367, + "tallyqa/reward_std": 0.2850634232163429, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.2671702951192856, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.862499952316284, + "vsr/reward_std": 0.2535732388496399, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.9750000238418579, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2481217384338379, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 98.89583333333333, + "epoch": 42.0, + "grad_norm": 1.524114727973938, + "kl": 0.2065846472978592, + "learning_rate": 1.9653816388332737e-06, + "loss": 0.0015, + "step": 42, + "vsr/reward": 2.9390624364217124, + "vsr/reward_std": 0.2552626008788745, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.07500000111758709, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.14523913711309433, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4583333333333333, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 104.0625, + "epoch": 43.0, + "grad_norm": 1.210720419883728, + "kl": 0.19888248046239218, + "learning_rate": 1.9637236782900097e-06, + "loss": 0.0015, + "step": 43, + "tallyqa/reward": 2.7804685831069946, + "tallyqa/reward_std": 0.3970213830471039, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000111758709, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7382000982761383, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.6875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4625000059604645, + "vsr/reward": 2.815624713897705, + "vsr/reward_std": 0.19929799437522888, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.11707086116075516, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.25, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 111.625, + "epoch": 44.0, + "grad_norm": 1.5103306770324707, + "kl": 0.21351591249306998, + "learning_rate": 1.962027671586086e-06, + "loss": 0.0016, + "step": 44, + "tallyqa/reward": 2.814392328262329, + "tallyqa/reward_std": 0.30823956926663715, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08333333457509677, + "tallyqa/rewards/gpt_score_reward": 0.8333333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.2254416545232137, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.20833333333333334, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 119.92708333333333, + "epoch": 45.0, + "grad_norm": 1.5103306770324707, + "kl": 0.17639707028865814, + "learning_rate": 1.960293685676943e-06, + "loss": 0.0013, + "step": 45, + "tallyqa/reward": 2.714583396911621, + "tallyqa/reward_std": 0.36242595314979553, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000298023224, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.8148436546325684, + "vsr/reward_std": 0.41014911234378815, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.39355544559657574, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.89583333333333, + "epoch": 46.0, + "grad_norm": 1.2491977214813232, + "kl": 0.20863270262877145, + "learning_rate": 1.9585217890173757e-06, + "loss": 0.0016, + "step": 46, + "tallyqa/reward": 2.906770706176758, + "tallyqa/reward_std": 0.2970550258954366, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.09583333631356557, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.22721587866544724, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5833333333333334, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 116.47916666666667, + "epoch": 47.0, + "grad_norm": 0.8592624068260193, + "kl": 0.1867161045471827, + "learning_rate": 1.9567120515588304e-06, + "loss": 0.0014, + "step": 47, + "tallyqa/reward": 2.6078124046325684, + "tallyqa/reward_std": 0.2866983711719513, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.05000000074505806, + "tallyqa/rewards/gpt_score_reward": 0.5, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0456884503364563, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.42500001192092896, + "vsr/reward": 2.978124976158142, + "vsr/reward_std": 0.2690294533967972, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2888008952140808, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 109.38541666666667, + "epoch": 48.0, + "grad_norm": 0.9890259504318237, + "kl": 0.18424140910307565, + "learning_rate": 1.954864544746643e-06, + "loss": 0.0014, + "step": 48, + "tallyqa/reward": 3.0151735146840415, + "tallyqa/reward_std": 0.2678264578183492, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.09583333383003871, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.4622157762447993, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.75, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 104.85416666666667, + "epoch": 49.0, + "grad_norm": 0.9890259504318237, + "kl": 0.19427449504534403, + "learning_rate": 1.9529793415172188e-06, + "loss": 0.0015, + "step": 49, + "tallyqa/reward": 3.002187490463257, + "tallyqa/reward_std": 0.3972461521625519, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.9179686307907104, + "vsr/reward_std": 0.21781427413225174, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5302943140268326, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 107.22916666666667, + "epoch": 50.0, + "grad_norm": 1.1882508993148804, + "kl": 0.19292579591274261, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.0014, + "step": 50, + "tallyqa/reward": 2.9468748569488525, + "tallyqa/reward_std": 0.3280414640903473, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.06250000186264515, + "tallyqa/rewards/gpt_score_reward": 0.625, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.2930210903286934, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.625, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.4625000059604645, + "vsr/reward": 3.0234375, + "vsr/reward_std": 0.21013911068439484, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.24256396293640137, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/vsr_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/vsr_val.jsonl_runtime": 294.8111, + "eval_../mm-cot-data/vsr_val.jsonl_samples_per_second": 0.977, + "eval_../mm-cot-data/vsr_val.jsonl_steps_per_second": 0.007, + "eval_vsr/reward": 2.7669270038604736, + "eval_vsr/reward_std": 0.427726686000824, + "eval_vsr/rewards/answer_format_reward": 0.5, + "eval_vsr/rewards/bleu_score_reward": 0.07291666666666667, + "eval_vsr/rewards/gpt_score_reward": 0.725000003973643, + "eval_vsr/rewards/grounded_region_bbox_IOU_loss": 0.4593134820461273, + "eval_vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_vsr/rewards/repetitive_reward": 0.5, + "eval_vsr/rewards/think_and_rethink_format_reward": 0.5, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/mme_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mme_val.jsonl_runtime": 384.628, + "eval_../mm-cot-data/mme_val.jsonl_samples_per_second": 0.624, + "eval_../mm-cot-data/mme_val.jsonl_steps_per_second": 0.003, + "eval_mme_count/reward": 2.893749713897705, + "eval_mme_count/reward_std": 0.326983779668808, + "eval_mme_count/rewards/answer_format_reward": 0.5, + "eval_mme_count/rewards/bleu_score_reward": 0.06875000149011612, + "eval_mme_count/rewards/gpt_score_reward": 0.6187499761581421, + "eval_mme_count/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mme_count/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_mme_count/rewards/repetitive_reward": 0.5, + "eval_mme_count/rewards/think_and_rethink_format_reward": 0.5, + "eval_mme_existence/reward": 2.938671827316284, + "eval_mme_existence/reward_std": 0.26649582386016846, + "eval_mme_existence/rewards/answer_format_reward": 0.5, + "eval_mme_existence/rewards/bleu_score_reward": 0.08124999701976776, + "eval_mme_existence/rewards/gpt_score_reward": 0.703125, + "eval_mme_existence/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mme_existence/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_mme_existence/rewards/repetitive_reward": 0.5, + "eval_mme_existence/rewards/think_and_rethink_format_reward": 0.5, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_runtime": 984.5773, + "eval_../mm-cot-data/tallyqa_val.jsonl_samples_per_second": 0.499, + "eval_../mm-cot-data/tallyqa_val.jsonl_steps_per_second": 0.002, + "eval_tallyqa/reward": 2.5256835222244263, + "eval_tallyqa/reward_std": 0.7864454537630081, + "eval_tallyqa/rewards/answer_format_reward": 0.4453125, + "eval_tallyqa/rewards/bleu_score_reward": 0.039062500931322575, + "eval_tallyqa/rewards/gpt_score_reward": 0.390625, + "eval_tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.274828078225255, + "eval_tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.53125, + "eval_tallyqa/rewards/repetitive_reward": 0.5, + "eval_tallyqa/rewards/think_and_rethink_format_reward": 0.4515624940395355, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/gqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/gqa_val.jsonl_runtime": 568.6468, + "eval_../mm-cot-data/gqa_val.jsonl_samples_per_second": 0.895, + "eval_../mm-cot-data/gqa_val.jsonl_steps_per_second": 0.004, + "eval_gqa/reward": 2.732781767845154, + "eval_gqa/reward_std": 0.45518842339515686, + "eval_gqa/rewards/answer_format_reward": 0.4921875, + "eval_gqa/rewards/bleu_score_reward": 0.05010606162250042, + "eval_gqa/rewards/gpt_score_reward": 0.59375, + "eval_gqa/rewards/grounded_region_bbox_IOU_loss": 0.47360797226428986, + "eval_gqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.484375, + "eval_gqa/rewards/repetitive_reward": 0.5, + "eval_gqa/rewards/think_and_rethink_format_reward": 0.4937499985098839, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_runtime": 2013.2214, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_samples_per_second": 0.497, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_steps_per_second": 0.002, + "eval_mathvista_mini/reward": 2.207624912261963, + "eval_mathvista_mini/reward_std": 0.5798305049538612, + "eval_mathvista_mini/rewards/answer_format_reward": 0.48828125, + "eval_mathvista_mini/rewards/bleu_score_reward": 0.01692708340124227, + "eval_mathvista_mini/rewards/gpt_score_reward": 0.5546875, + "eval_mathvista_mini/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mathvista_mini/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.19140625, + "eval_mathvista_mini/rewards/repetitive_reward": 0.4942963421344757, + "eval_mathvista_mini/rewards/think_and_rethink_format_reward": 0.47187499701976776, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_runtime": 895.3095, + "eval_../mm-cot-data/pope_coco_val.jsonl_samples_per_second": 0.558, + "eval_../mm-cot-data/pope_coco_val.jsonl_steps_per_second": 0.002, + "eval_pope_coco/reward": 2.9193357825279236, + "eval_pope_coco/reward_std": 0.28839484602212906, + "eval_pope_coco/rewards/answer_format_reward": 0.4921875, + "eval_pope_coco/rewards/bleu_score_reward": 0.09375, + "eval_pope_coco/rewards/gpt_score_reward": 0.8828125, + "eval_pope_coco/rewards/grounded_region_bbox_IOU_loss": -0.03796842694282532, + "eval_pope_coco/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4453125, + "eval_pope_coco/rewards/repetitive_reward": 0.5, + "eval_pope_coco/rewards/think_and_rethink_format_reward": 0.4937499985098839, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_runtime": 1752.2109, + "eval_../mm-cot-data/ovd_position_val.jsonl_samples_per_second": 1.225, + "eval_../mm-cot-data/ovd_position_val.jsonl_steps_per_second": 0.005, + "eval_ovd_position/reward": 1.9927646342445822, + "eval_ovd_position/reward_std": 0.03613269208546947, + "eval_ovd_position/rewards/answer_format_reward": 0.5, + "eval_ovd_position/rewards/bleu_score_reward": 0.006382761618998998, + "eval_ovd_position/rewards/gpt_score_reward": 0.0, + "eval_ovd_position/rewards/grounded_region_bbox_IOU_loss": 0.3534996790044448, + "eval_ovd_position/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4742647058823529, + "eval_ovd_position/rewards/repetitive_reward": 0.5, + "eval_ovd_position/rewards/think_and_rethink_format_reward": 0.5, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_runtime": 3316.6669, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_samples_per_second": 0.938, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_steps_per_second": 0.004, + "eval_ovd_relationship/reward": 1.9955636215209962, + "eval_ovd_relationship/reward_std": 0.01438146598637104, + "eval_ovd_relationship/rewards/answer_format_reward": 0.5, + "eval_ovd_relationship/rewards/bleu_score_reward": 0.0018125000223517419, + "eval_ovd_relationship/rewards/gpt_score_reward": 0.0, + "eval_ovd_relationship/rewards/grounded_region_bbox_IOU_loss": 0.26859452545642853, + "eval_ovd_relationship/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.49875, + "eval_ovd_relationship/rewards/repetitive_reward": 0.5, + "eval_ovd_relationship/rewards/think_and_rethink_format_reward": 0.5, + "step": 50 + }, + { + "epoch": 50.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_runtime": 728.179, + "eval_../mm-cot-data/ovd_negation_val.jsonl_samples_per_second": 0.915, + "eval_../mm-cot-data/ovd_negation_val.jsonl_steps_per_second": 0.004, + "eval_ovd_negation/reward": 1.9925325314203899, + "eval_ovd_negation/reward_std": 0.030136244371533394, + "eval_ovd_negation/rewards/answer_format_reward": 0.5, + "eval_ovd_negation/rewards/bleu_score_reward": 0.004687500069849193, + "eval_ovd_negation/rewards/gpt_score_reward": 0.0, + "eval_ovd_negation/rewards/grounded_region_bbox_IOU_loss": 0.2705760523676872, + "eval_ovd_negation/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_ovd_negation/rewards/repetitive_reward": 0.5, + "eval_ovd_negation/rewards/think_and_rethink_format_reward": 0.5, + "step": 50 + }, + { + "completion_length": 109.71875, + "epoch": 51.0, + "grad_norm": 0.83548504114151, + "kl": 0.1981164962053299, + "learning_rate": 1.9490961449902947e-06, + "loss": 0.0015, + "step": 51, + "tallyqa/reward": 3.0338540077209473, + "tallyqa/reward_std": 0.18875309824943542, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.8850241303443909, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.980468511581421, + "vsr/reward_std": 0.24132700264453888, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.20237122476100922, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 105.09375, + "epoch": 52.0, + "grad_norm": 0.8869257569313049, + "kl": 0.2088193396727244, + "learning_rate": 1.9470983049947442e-06, + "loss": 0.0016, + "step": 52, + "vsr/reward": 3.009583314259847, + "vsr/reward_std": 0.24639597038427988, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.039166667188207306, + "vsr/rewards/gpt_score_reward": 0.4375, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2489204208056132, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 110.35416666666667, + "epoch": 53.0, + "grad_norm": 0.8869257569313049, + "kl": 0.23700429499149323, + "learning_rate": 1.9450630751798046e-06, + "loss": 0.0018, + "step": 53, + "tallyqa/reward": 3.0851560831069946, + "tallyqa/reward_std": 0.23868683725595474, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.5, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.75, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.7640624046325684, + "vsr/reward_std": 0.4138374626636505, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.272705078125, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 107.85416666666667, + "epoch": 54.0, + "grad_norm": 9.013299942016602, + "kl": 0.21803847452004751, + "learning_rate": 1.9429905358928646e-06, + "loss": 0.0016, + "step": 54, + "tallyqa/reward": 3.0749998092651367, + "tallyqa/reward_std": 0.288369745016098, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.05000000074505806, + "tallyqa/rewards/gpt_score_reward": 0.5, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.724333643913269, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.9828124046325684, + "vsr/reward_std": 0.3215499073266983, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.02083333395421505, + "vsr/rewards/gpt_score_reward": 0.1875, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.16176476515829563, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 111.14583333333333, + "epoch": 55.0, + "grad_norm": 1.8548916578292847, + "kl": 0.21563070515791574, + "learning_rate": 1.9408807689542254e-06, + "loss": 0.0016, + "step": 55, + "vsr/reward": 3.0749998887379966, + "vsr/reward_std": 0.3053816358248393, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5340286294619242, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 108.375, + "epoch": 56.0, + "grad_norm": 0.8850221037864685, + "kl": 0.2062819500764211, + "learning_rate": 1.938733857653874e-06, + "loss": 0.0015, + "step": 56, + "vsr/reward": 3.1467302640279136, + "vsr/reward_std": 0.15365849435329437, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2684437880913417, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4583333333333333, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 105.53125, + "epoch": 57.0, + "grad_norm": 0.8850221037864685, + "kl": 0.23626423378785452, + "learning_rate": 1.936549886748192e-06, + "loss": 0.0018, + "step": 57, + "tallyqa/reward": 3.0248263676961265, + "tallyqa/reward_std": 0.2397108276685079, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08333333457509677, + "tallyqa/rewards/gpt_score_reward": 0.8333333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.7713813384373983, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.75, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 112.39583333333333, + "epoch": 58.0, + "grad_norm": 0.9757654666900635, + "kl": 0.20648842056592306, + "learning_rate": 1.934328942456612e-06, + "loss": 0.0015, + "step": 58, + "tallyqa/reward": 3.0984373092651367, + "tallyqa/reward_std": 0.21508252620697021, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08749999850988388, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.5, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.75, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.107031226158142, + "vsr/reward_std": 0.2786827087402344, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08125000074505806, + "vsr/rewards/gpt_score_reward": 0.824999988079071, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.05188572406768799, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 111.83333333333333, + "epoch": 59.0, + "grad_norm": 0.9088347554206848, + "kl": 0.19422384599844614, + "learning_rate": 1.932071112458211e-06, + "loss": 0.0015, + "step": 59, + "tallyqa/reward": 3.2002604007720947, + "tallyqa/reward_std": 0.3214568644762039, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08750000223517418, + "tallyqa/rewards/gpt_score_reward": 0.875, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.75, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.875, + "vsr/reward_std": 0.26586782932281494, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.875, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5944048762321472, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 110.54166666666667, + "epoch": 60.0, + "grad_norm": 0.8615918755531311, + "kl": 0.1902091900507609, + "learning_rate": 1.929776485888251e-06, + "loss": 0.0014, + "step": 60, + "tallyqa/reward": 3.3140625953674316, + "tallyqa/reward_std": 0.2052331119775772, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08749999850988388, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7931138277053833, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.064895749092102, + "vsr/reward_std": 0.13017258793115616, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6740247011184692, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 112.72916666666667, + "epoch": 61.0, + "grad_norm": 0.8615918755531311, + "kl": 0.19618447124958038, + "learning_rate": 1.927445153334661e-06, + "loss": 0.0015, + "step": 61, + "tallyqa/reward": 3.1524999936421714, + "tallyqa/reward_std": 0.24812235434850058, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08333333457509677, + "tallyqa/rewards/gpt_score_reward": 0.8333333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.28424975275993347, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.6666666666666666, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.07291666666667, + "epoch": 62.0, + "grad_norm": 1.0583548545837402, + "kl": 0.1886474092801412, + "learning_rate": 1.925077206834458e-06, + "loss": 0.0014, + "step": 62, + "tallyqa/reward": 3.224479079246521, + "tallyqa/reward_std": 0.260728120803833, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.18876796960830688, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 2.956249952316284, + "vsr/reward_std": 0.2965421974658966, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2729557752609253, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 115.5625, + "epoch": 63.0, + "grad_norm": 0.6920527219772339, + "kl": 0.20506837467352548, + "learning_rate": 1.9226727398701147e-06, + "loss": 0.0015, + "step": 63, + "tallyqa/reward": 3.1312499046325684, + "tallyqa/reward_std": 0.13608437776565552, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000298023224, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.10513443499803543, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1847654581069946, + "vsr/reward_std": 0.15391971915960312, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.24826128780841827, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 113.98958333333333, + "epoch": 64.0, + "grad_norm": 0.6028052568435669, + "kl": 0.21418521801630655, + "learning_rate": 1.9202318473658702e-06, + "loss": 0.0016, + "step": 64, + "vsr/reward": 3.178298552831014, + "vsr/reward_std": 0.10707634687423706, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.06666666766007741, + "vsr/rewards/gpt_score_reward": 0.6666666666666666, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4225274796287219, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.59375, + "epoch": 65.0, + "grad_norm": 0.6028052568435669, + "kl": 0.19733131925264993, + "learning_rate": 1.917754625683981e-06, + "loss": 0.0015, + "step": 65, + "tallyqa/reward": 3.2390624284744263, + "tallyqa/reward_std": 0.10461002588272095, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000111758709, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.37154003977775574, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0183331966400146, + "vsr/reward_std": 0.12953543663024902, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5306199789047241, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 125.27083333333333, + "epoch": 66.0, + "grad_norm": 0.6111531257629395, + "kl": 0.1905492494503657, + "learning_rate": 1.9152411726209172e-06, + "loss": 0.0014, + "step": 66, + "tallyqa/reward": 3.2249999046325684, + "tallyqa/reward_std": 0.22499999403953552, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.9041621088981628, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.096874952316284, + "vsr/reward_std": 0.16102084144949913, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6150562763214111, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 128.96875, + "epoch": 67.0, + "grad_norm": 0.6444419622421265, + "kl": 0.20456632475058237, + "learning_rate": 1.9126915874035028e-06, + "loss": 0.0015, + "step": 67, + "tallyqa/reward": 3.1965103149414062, + "tallyqa/reward_std": 0.18112562596797943, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.05834583565592766, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2249999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.727705717086792, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.85416666666667, + "epoch": 68.0, + "grad_norm": 0.45766910910606384, + "kl": 0.2020894189675649, + "learning_rate": 1.9101059706849955e-06, + "loss": 0.0015, + "step": 68, + "tallyqa/reward": 3.197916626930237, + "tallyqa/reward_std": 0.1520841345191002, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.37303007021546364, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2124998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.011092867702245712, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 132.96875, + "epoch": 69.0, + "grad_norm": 0.45766910910606384, + "kl": 0.19203470150629678, + "learning_rate": 1.9074844245411166e-06, + "loss": 0.0014, + "step": 69, + "tallyqa/reward": 3.2005207538604736, + "tallyqa/reward_std": 0.228253573179245, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2468749284744263, + "vsr/reward_std": 0.13124999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5732084214687347, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.32291666666667, + "epoch": 70.0, + "grad_norm": 0.7158905267715454, + "kl": 0.20768588781356812, + "learning_rate": 1.9048270524660196e-06, + "loss": 0.0016, + "step": 70, + "tallyqa/reward": 3.221354126930237, + "tallyqa/reward_std": 0.11634461581707001, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.4582935571670532, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2874999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.45152825117111206, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 141.22916666666666, + "epoch": 71.0, + "grad_norm": 0.5657402873039246, + "kl": 0.19900232553482056, + "learning_rate": 1.9021339593682027e-06, + "loss": 0.0015, + "step": 71, + "tallyqa/reward": 3.125, + "tallyqa/reward_std": 0.2435891181230545, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.07500000298023224, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.004886605776846409, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3031249046325684, + "vsr/reward_std": 0.06277695298194885, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4595527648925781, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 125.59375, + "epoch": 72.0, + "grad_norm": 0.3635677099227905, + "kl": 0.20946546892325082, + "learning_rate": 1.899405251566371e-06, + "loss": 0.0016, + "step": 72, + "tallyqa/reward": 3.1968748569488525, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.5683673024177551, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2490885257720947, + "vsr/reward_std": 0.13522883504629135, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6567749679088593, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 132.16666666666666, + "epoch": 73.0, + "grad_norm": 0.3635677099227905, + "kl": 0.20648357272148132, + "learning_rate": 1.896641036785236e-06, + "loss": 0.0015, + "step": 73, + "tallyqa/reward": 3.4124999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.130468726158142, + "vsr/reward_std": 0.15519269555807114, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4224495440721512, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.52083333333333, + "epoch": 74.0, + "grad_norm": 0.6529050469398499, + "kl": 0.19914341469605765, + "learning_rate": 1.8938414241512637e-06, + "loss": 0.0015, + "step": 74, + "tallyqa/reward": 3.2216144800186157, + "tallyqa/reward_std": 0.08702803403139114, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08750000223517418, + "tallyqa/rewards/gpt_score_reward": 0.875, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.40937531273812056, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.268749952316284, + "vsr/reward_std": 0.16249999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.726186215877533, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.84375, + "epoch": 75.0, + "grad_norm": 0.7603000998497009, + "kl": 0.22289642691612244, + "learning_rate": 1.8910065241883678e-06, + "loss": 0.0017, + "step": 75, + "tallyqa/reward": 3.2630207538604736, + "tallyqa/reward_std": 0.29206421971321106, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.5, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.75, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2796874046325684, + "vsr/reward_std": 0.06562499701976776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.987500011920929, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.36691272258758545, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 127.67708333333333, + "epoch": 76.0, + "grad_norm": 0.423773854970932, + "kl": 0.22651845713456473, + "learning_rate": 1.8881364488135445e-06, + "loss": 0.0017, + "step": 76, + "vsr/reward": 3.2097222010294595, + "vsr/reward_std": 0.11747533828020096, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5903219083944956, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.79166666666667, + "epoch": 77.0, + "grad_norm": 0.423773854970932, + "kl": 0.23955331246058145, + "learning_rate": 1.885231311332455e-06, + "loss": 0.0018, + "step": 77, + "tallyqa/reward": 3.2593748569488525, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6535199880599976, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.190885305404663, + "vsr/reward_std": 0.11199093610048294, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.987500011920929, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4085921496152878, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 128.4375, + "epoch": 78.0, + "grad_norm": 0.4916614890098572, + "kl": 0.22349445025126138, + "learning_rate": 1.8822912264349532e-06, + "loss": 0.0017, + "step": 78, + "tallyqa/reward": 3.346874952316284, + "tallyqa/reward_std": 0.0062499940395355225, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6772606372833252, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2626301050186157, + "vsr/reward_std": 0.030661042779684067, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.352586155757308, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 126.60416666666667, + "epoch": 79.0, + "grad_norm": 0.48260095715522766, + "kl": 0.218014528354009, + "learning_rate": 1.879316310190556e-06, + "loss": 0.0016, + "step": 79, + "tallyqa/reward": 3.253124952316284, + "tallyqa/reward_std": 0.09895617142319679, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2874999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5853592157363892, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 137.3125, + "epoch": 80.0, + "grad_norm": 0.50992351770401, + "kl": 0.2321702241897583, + "learning_rate": 1.8763066800438634e-06, + "loss": 0.0017, + "step": 80, + "tallyqa/reward": 3.2406249046325684, + "tallyqa/reward_std": 0.13054219260811806, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6805420815944672, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1499998569488525, + "vsr/reward_std": 0.1417059600353241, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5635287165641785, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.09375, + "epoch": 81.0, + "grad_norm": 0.50992351770401, + "kl": 0.2278223286072413, + "learning_rate": 1.87326245480992e-06, + "loss": 0.0017, + "step": 81, + "tallyqa/reward": 3.3968749046325684, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.217447876930237, + "vsr/reward_std": 0.11298665776848793, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.40822291374206543, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 124.72916666666667, + "epoch": 82.0, + "grad_norm": 0.38082557916641235, + "kl": 0.24678840736548105, + "learning_rate": 1.8701837546695256e-06, + "loss": 0.0019, + "step": 82, + "tallyqa/reward": 3.2874999046325684, + "tallyqa/reward_std": 0.04999999701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.3874140977859497, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.141249895095825, + "vsr/reward_std": 0.16749998927116394, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.07999999821186066, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.06638212502002716, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 115.57291666666667, + "epoch": 83.0, + "grad_norm": 0.5151656270027161, + "kl": 0.22040999432404837, + "learning_rate": 1.86707070116449e-06, + "loss": 0.0017, + "step": 83, + "tallyqa/reward": 3.348828077316284, + "tallyqa/reward_std": 0.0023437440395355225, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.374664768576622, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2124998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.7271878123283386, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 133.69791666666666, + "epoch": 84.0, + "grad_norm": 0.43583944439888, + "kl": 0.22538375357786813, + "learning_rate": 1.863923417192835e-06, + "loss": 0.0017, + "step": 84, + "tallyqa/reward": 3.236979087193807, + "tallyqa/reward_std": 0.08115886896848679, + "tallyqa/rewards/answer_format_reward": 0.4583333333333333, + "tallyqa/rewards/bleu_score_reward": 0.0833333358168602, + "tallyqa/rewards/gpt_score_reward": 0.8333333333333334, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.3307449345787366, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9583333333333334, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.48333332935969037 + }, + { + "completion_length": 123.78125, + "epoch": 85.0, + "grad_norm": 0.43583944439888, + "kl": 0.21668948233127594, + "learning_rate": 1.8607420270039435e-06, + "loss": 0.0016, + "step": 85, + "tallyqa/reward": 3.268749952316284, + "tallyqa/reward_std": 0.16249999403953552, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2796874046325684, + "vsr/reward_std": 0.06562499701976776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08750000223517418, + "vsr/rewards/gpt_score_reward": 0.875, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3363039791584015, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.57291666666667, + "epoch": 86.0, + "grad_norm": 0.5313764810562134, + "kl": 0.21156365672747293, + "learning_rate": 1.8575266561936522e-06, + "loss": 0.0016, + "step": 86, + "tallyqa/reward": 3.4124999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.116618312895298, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.172499895095825, + "vsr/reward_std": 0.12435072660446167, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4129420891404152, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.01041666666667, + "epoch": 87.0, + "grad_norm": 0.43482506275177, + "kl": 0.20092792312304178, + "learning_rate": 1.854277431699295e-06, + "loss": 0.0015, + "step": 87, + "tallyqa/reward": 3.3447914123535156, + "tallyqa/reward_std": 0.003608435858041048, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2890623807907104, + "vsr/reward_std": 0.046875, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.8976896703243256, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 109.77083333333333, + "epoch": 88.0, + "grad_norm": 0.4642576575279236, + "kl": 0.21634343266487122, + "learning_rate": 1.850994481794692e-06, + "loss": 0.0016, + "step": 88, + "tallyqa/reward": 3.299999952316284, + "tallyqa/reward_std": 0.1721687763929367, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.231874942779541, + "vsr/reward_std": 0.03239745274186134, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.41118449717760086, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.66666666666667, + "epoch": 89.0, + "grad_norm": 0.4642576575279236, + "kl": 0.2016658584276835, + "learning_rate": 1.847677936085083e-06, + "loss": 0.0015, + "step": 89, + "tallyqa/reward": 3.2888020277023315, + "tallyqa/reward_std": 0.042581952176988125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.6105802655220032, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.315624952316284, + "vsr/reward_std": 0.06874999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.7328258752822876, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 112.51041666666667, + "epoch": 90.0, + "grad_norm": 0.41234758496284485, + "kl": 0.21652484436829886, + "learning_rate": 1.844327925502015e-06, + "loss": 0.0016, + "step": 90, + "tallyqa/reward": 3.3187499046325684, + "tallyqa/reward_std": 0.0625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.11610224843025208, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1734373569488525, + "vsr/reward_std": 0.015625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3703564256429672, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.38541666666667, + "epoch": 91.0, + "grad_norm": 0.3744339942932129, + "kl": 0.2138026456038157, + "learning_rate": 1.8409445822981691e-06, + "loss": 0.0016, + "step": 91, + "tallyqa/reward": 3.2726560831069946, + "tallyqa/reward_std": 0.01718750037252903, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6925985515117645, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0437498092651367, + "vsr/reward_std": 0.08749999850988388, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.9500000476837158, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.43464595079421997, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.78125, + "epoch": 92.0, + "grad_norm": 0.36091017723083496, + "kl": 0.2092947562535604, + "learning_rate": 1.8375280400421418e-06, + "loss": 0.0016, + "step": 92, + "tallyqa/reward": 3.4749999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0726561546325684, + "vsr/reward_std": 0.08805496990680695, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5414523109793663, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 116.90625, + "epoch": 93.0, + "grad_norm": 0.36091017723083496, + "kl": 0.22336182494958243, + "learning_rate": 1.8340784336131711e-06, + "loss": 0.0017, + "step": 93, + "tallyqa/reward": 3.4124999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6545839309692383, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1249998807907104, + "vsr/reward_std": 0.047358433715999126, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.48750001192092896, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.24646050110459328, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 108.08333333333333, + "epoch": 94.0, + "grad_norm": 0.41118955612182617, + "kl": 0.21704593300819397, + "learning_rate": 1.8305958991958126e-06, + "loss": 0.0016, + "step": 94, + "tallyqa/reward": 3.198281168937683, + "tallyqa/reward_std": 0.07435039430856705, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.08750000223517418, + "tallyqa/rewards/gpt_score_reward": 0.875, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.20550251007080078, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.4124999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.9750000238418579, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6702771782875061, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 110.95833333333333, + "epoch": 95.0, + "grad_norm": 0.4579521715641022, + "kl": 0.19767853617668152, + "learning_rate": 1.8270805742745616e-06, + "loss": 0.0015, + "step": 95, + "tallyqa/reward": 3.328125, + "tallyqa/reward_std": 0.16874998807907104, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2328124046325684, + "vsr/reward_std": 0.033054217929020524, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.297441266477108, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.46875, + "epoch": 96.0, + "grad_norm": 0.49933990836143494, + "kl": 0.21043044328689575, + "learning_rate": 1.8235325976284273e-06, + "loss": 0.0016, + "step": 96, + "tallyqa/reward": 3.375520706176758, + "tallyqa/reward_std": 0.06706424057483673, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6859284043312073, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1953123807907104, + "vsr/reward_std": 0.1093749962747097, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.30130428075790405, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 108.60416666666667, + "epoch": 97.0, + "grad_norm": 0.49933990836143494, + "kl": 0.21387272576491037, + "learning_rate": 1.8199521093254523e-06, + "loss": 0.0016, + "step": 97, + "tallyqa/reward": 3.2593748569488525, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.49231189489364624, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.28515625, + "vsr/reward_std": 0.0651943925768137, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08125000074505806, + "vsr/rewards/gpt_score_reward": 0.9124999940395355, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.1640644297003746, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.4625000059604645 + }, + { + "completion_length": 106.09375, + "epoch": 98.0, + "grad_norm": 0.6564221978187561, + "kl": 0.21401581168174744, + "learning_rate": 1.816339250717184e-06, + "loss": 0.0016, + "step": 98, + "tallyqa/reward": 3.3187499046325684, + "tallyqa/reward_std": 0.0625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.868064820766449, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.221874952316284, + "vsr/reward_std": 0.04999999701976776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2525622621178627, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.79166666666667, + "epoch": 99.0, + "grad_norm": 0.7439166903495789, + "kl": 0.19334849218527475, + "learning_rate": 1.8126941644330937e-06, + "loss": 0.0015, + "step": 99, + "tallyqa/reward": 3.1117186546325684, + "tallyqa/reward_std": 0.14203842356801033, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.6665460467338562, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2937498092651367, + "vsr/reward_std": 0.09828633815050125, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.23116950690746307, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.44999998807907104 + }, + { + "completion_length": 114.79166666666667, + "epoch": 100.0, + "grad_norm": 1.0588696002960205, + "kl": 0.2034647266070048, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.0015, + "step": 100, + "tallyqa/reward": 3.2435762882232666, + "tallyqa/reward_std": 0.10788384079933167, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.06854397058486938, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/vsr_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/vsr_val.jsonl_runtime": 511.4868, + "eval_../mm-cot-data/vsr_val.jsonl_samples_per_second": 0.563, + "eval_../mm-cot-data/vsr_val.jsonl_steps_per_second": 0.004, + "eval_vsr/reward": 2.789583206176758, + "eval_vsr/reward_std": 0.4400843183199565, + "eval_vsr/rewards/answer_format_reward": 0.5, + "eval_vsr/rewards/bleu_score_reward": 0.06770833457509677, + "eval_vsr/rewards/gpt_score_reward": 0.6979166666666666, + "eval_vsr/rewards/grounded_region_bbox_IOU_loss": 0.35983973244826, + "eval_vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4895833333333333, + "eval_vsr/rewards/repetitive_reward": 0.5, + "eval_vsr/rewards/think_and_rethink_format_reward": 0.5, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/mme_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mme_val.jsonl_runtime": 631.5717, + "eval_../mm-cot-data/mme_val.jsonl_samples_per_second": 0.38, + "eval_../mm-cot-data/mme_val.jsonl_steps_per_second": 0.002, + "eval_mme_color/reward": 2.8421874046325684, + "eval_mme_color/reward_std": 0.3939952850341797, + "eval_mme_color/rewards/answer_format_reward": 0.46875, + "eval_mme_color/rewards/bleu_score_reward": 0.08749999850988388, + "eval_mme_color/rewards/gpt_score_reward": 0.875, + "eval_mme_color/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mme_color/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.34375, + "eval_mme_color/rewards/repetitive_reward": 0.5, + "eval_mme_color/rewards/think_and_rethink_format_reward": 0.4749999940395355, + "eval_mme_count/reward": 2.871875047683716, + "eval_mme_count/reward_std": 0.32844018936157227, + "eval_mme_count/rewards/answer_format_reward": 0.5, + "eval_mme_count/rewards/bleu_score_reward": 0.09375, + "eval_mme_count/rewards/gpt_score_reward": 0.9375, + "eval_mme_count/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mme_count/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.40625, + "eval_mme_count/rewards/repetitive_reward": 0.5, + "eval_mme_count/rewards/think_and_rethink_format_reward": 0.48124998807907104, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_runtime": 1045.7197, + "eval_../mm-cot-data/tallyqa_val.jsonl_samples_per_second": 0.47, + "eval_../mm-cot-data/tallyqa_val.jsonl_steps_per_second": 0.002, + "eval_tallyqa/reward": 2.7443360090255737, + "eval_tallyqa/reward_std": 0.7595952749252319, + "eval_tallyqa/rewards/answer_format_reward": 0.5, + "eval_tallyqa/rewards/bleu_score_reward": 0.04687500186264515, + "eval_tallyqa/rewards/gpt_score_reward": 0.484375, + "eval_tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.2274744212627411, + "eval_tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.71875, + "eval_tallyqa/rewards/repetitive_reward": 0.5, + "eval_tallyqa/rewards/think_and_rethink_format_reward": 0.49531249701976776, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/gqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/gqa_val.jsonl_runtime": 903.2116, + "eval_../mm-cot-data/gqa_val.jsonl_samples_per_second": 0.564, + "eval_../mm-cot-data/gqa_val.jsonl_steps_per_second": 0.002, + "eval_gqa/reward": 2.753390312194824, + "eval_gqa/reward_std": 0.4510577619075775, + "eval_gqa/rewards/answer_format_reward": 0.5, + "eval_gqa/rewards/bleu_score_reward": 0.05448106210678816, + "eval_gqa/rewards/gpt_score_reward": 0.620312511920929, + "eval_gqa/rewards/grounded_region_bbox_IOU_loss": 0.4907994493842125, + "eval_gqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_gqa/rewards/repetitive_reward": 0.5, + "eval_gqa/rewards/think_and_rethink_format_reward": 0.5, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_runtime": 2538.4172, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_samples_per_second": 0.394, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_steps_per_second": 0.002, + "eval_mathvista_mini/reward": 2.177223116159439, + "eval_mathvista_mini/reward_std": 0.5470011383295059, + "eval_mathvista_mini/rewards/answer_format_reward": 0.4921875, + "eval_mathvista_mini/rewards/bleu_score_reward": 0.02998367592226714, + "eval_mathvista_mini/rewards/gpt_score_reward": 0.5734374970197678, + "eval_mathvista_mini/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mathvista_mini/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.109375, + "eval_mathvista_mini/rewards/repetitive_reward": 0.49997905641794205, + "eval_mathvista_mini/rewards/think_and_rethink_format_reward": 0.47734374552965164, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_runtime": 1177.9782, + "eval_../mm-cot-data/pope_coco_val.jsonl_samples_per_second": 0.424, + "eval_../mm-cot-data/pope_coco_val.jsonl_steps_per_second": 0.002, + "eval_pope_coco/reward": 2.7193357944488525, + "eval_pope_coco/reward_std": 0.437619224190712, + "eval_pope_coco/rewards/answer_format_reward": 0.5, + "eval_pope_coco/rewards/bleu_score_reward": 0.08437500149011612, + "eval_pope_coco/rewards/gpt_score_reward": 0.8140624910593033, + "eval_pope_coco/rewards/grounded_region_bbox_IOU_loss": -0.556345634162426, + "eval_pope_coco/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.3125, + "eval_pope_coco/rewards/repetitive_reward": 0.5, + "eval_pope_coco/rewards/think_and_rethink_format_reward": 0.5, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_runtime": 2963.9466, + "eval_../mm-cot-data/ovd_position_val.jsonl_samples_per_second": 0.724, + "eval_../mm-cot-data/ovd_position_val.jsonl_steps_per_second": 0.003, + "eval_ovd_position/reward": 2.000275738099042, + "eval_ovd_position/reward_std": 0.021751368281376714, + "eval_ovd_position/rewards/answer_format_reward": 0.5, + "eval_ovd_position/rewards/bleu_score_reward": 0.006985294303911573, + "eval_ovd_position/rewards/gpt_score_reward": 0.0, + "eval_ovd_position/rewards/grounded_region_bbox_IOU_loss": 0.37093065854381113, + "eval_ovd_position/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.49264705882352944, + "eval_ovd_position/rewards/repetitive_reward": 0.5, + "eval_ovd_position/rewards/think_and_rethink_format_reward": 0.5, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_runtime": 5476.0463, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_samples_per_second": 0.568, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_steps_per_second": 0.002, + "eval_ovd_relationship/reward": 1.995802354812622, + "eval_ovd_relationship/reward_std": 0.015050972551107407, + "eval_ovd_relationship/rewards/answer_format_reward": 0.5, + "eval_ovd_relationship/rewards/bleu_score_reward": 0.0017545454992796295, + "eval_ovd_relationship/rewards/gpt_score_reward": 0.0, + "eval_ovd_relationship/rewards/grounded_region_bbox_IOU_loss": 0.2541656565666199, + "eval_ovd_relationship/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.49375, + "eval_ovd_relationship/rewards/repetitive_reward": 0.5, + "eval_ovd_relationship/rewards/think_and_rethink_format_reward": 0.49924999952316285, + "step": 100 + }, + { + "epoch": 100.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_runtime": 1292.2969, + "eval_../mm-cot-data/ovd_negation_val.jsonl_samples_per_second": 0.515, + "eval_../mm-cot-data/ovd_negation_val.jsonl_steps_per_second": 0.002, + "eval_ovd_negation/reward": 1.9987350304921467, + "eval_ovd_negation/reward_std": 0.019836039748042822, + "eval_ovd_negation/rewards/answer_format_reward": 0.5, + "eval_ovd_negation/rewards/bleu_score_reward": 0.005208333469151209, + "eval_ovd_negation/rewards/gpt_score_reward": 0.0, + "eval_ovd_negation/rewards/grounded_region_bbox_IOU_loss": 0.2841632862885793, + "eval_ovd_negation/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4947916666666667, + "eval_ovd_negation/rewards/repetitive_reward": 0.5, + "eval_ovd_negation/rewards/think_and_rethink_format_reward": 0.5, + "step": 100 + }, + { + "completion_length": 110.86458333333333, + "epoch": 101.0, + "grad_norm": 1.0588696002960205, + "kl": 0.19113564491271973, + "learning_rate": 1.8053078857111217e-06, + "loss": 0.0014, + "step": 101, + "tallyqa/reward": 3.3562499284744263, + "tallyqa/reward_std": 0.04999999701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7649878859519958, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0718748569488525, + "vsr/reward_std": 0.03125, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5893831253051758, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.51041666666667, + "epoch": 102.0, + "grad_norm": 0.4080360531806946, + "kl": 0.19897079964478812, + "learning_rate": 1.8015669848708766e-06, + "loss": 0.0015, + "step": 102, + "tallyqa/reward": 3.1781249046325684, + "tallyqa/reward_std": 0.11874999105930328, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 0.75, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3132811784744263, + "vsr/reward_std": 0.11283114925026894, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5049644559621811, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.77083333333333, + "epoch": 103.0, + "grad_norm": 0.5803070664405823, + "kl": 0.18610897660255432, + "learning_rate": 1.7977944395385709e-06, + "loss": 0.0014, + "step": 103, + "vsr/reward": 3.188541571299235, + "vsr/reward_std": 0.13077812641859055, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.058333334823449455, + "vsr/rewards/gpt_score_reward": 0.5833333333333334, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.14525225448111692, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 128.05208333333334, + "epoch": 104.0, + "grad_norm": 0.32353171706199646, + "kl": 0.18469294408957163, + "learning_rate": 1.7939903986478354e-06, + "loss": 0.0014, + "step": 104, + "tallyqa/reward": 3.2734373807907104, + "tallyqa/reward_std": 0.015625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.05809243023395538, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3343749046325684, + "vsr/reward_std": 0.15625, + "vsr/rewards/answer_format_reward": 0.375, + "vsr/rewards/bleu_score_reward": 0.07500000298023224, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": -0.37616467475891113, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.375, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.4000000059604645 + }, + { + "completion_length": 115.32291666666667, + "epoch": 105.0, + "grad_norm": 0.32353171706199646, + "kl": 0.19192602733771005, + "learning_rate": 1.7901550123756903e-06, + "loss": 0.0014, + "step": 105, + "tallyqa/reward": 3.3359373807907104, + "tallyqa/reward_std": 0.14198118448257446, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.677470326423645, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1499998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.039557524025440216, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 116.0, + "epoch": 106.0, + "grad_norm": 0.5021659731864929, + "kl": 0.19098818798859915, + "learning_rate": 1.7862884321366187e-06, + "loss": 0.0014, + "step": 106, + "tallyqa/reward": 3.2218748331069946, + "tallyqa/reward_std": 0.05625000223517418, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.9462404400110245, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3020832538604736, + "vsr/reward_std": 0.09583333134651184, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.09166666865348816, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.13521653413772583, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 113.86458333333333, + "epoch": 107.0, + "grad_norm": 0.656090259552002, + "kl": 0.19679838915665945, + "learning_rate": 1.7823908105765878e-06, + "loss": 0.0015, + "step": 107, + "tallyqa/reward": 3.2473957538604736, + "tallyqa/reward_std": 0.0677083320915699, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7001632899045944, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.315624952316284, + "vsr/reward_std": 0.06874999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5582557916641235, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.04166666666667, + "epoch": 108.0, + "grad_norm": 0.5133208632469177, + "kl": 0.18495462834835052, + "learning_rate": 1.7784623015670235e-06, + "loss": 0.0014, + "step": 108, + "tallyqa/reward": 3.4343748092651367, + "tallyqa/reward_std": 0.08125000447034836, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7513253092765808, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.264843702316284, + "vsr/reward_std": 0.03281250037252903, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.8467094302177429, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.75, + "epoch": 109.0, + "grad_norm": 0.5133208632469177, + "kl": 0.1943558802207311, + "learning_rate": 1.7745030601987336e-06, + "loss": 0.0015, + "step": 109, + "tallyqa/reward": 3.3499999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.8520041704177856, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3109374046325684, + "vsr/reward_std": 0.001804217929020524, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.505576953291893, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.04166666666667, + "epoch": 110.0, + "grad_norm": 0.1971115618944168, + "kl": 0.18835549553235373, + "learning_rate": 1.7705132427757892e-06, + "loss": 0.0014, + "step": 110, + "tallyqa/reward": 3.3906248807907104, + "tallyqa/reward_std": 0.04374999925494194, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6745314598083496, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.140625, + "vsr/reward_std": 0.01875000260770321, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.1328786015510559, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.42500001192092896 + }, + { + "completion_length": 119.97916666666667, + "epoch": 111.0, + "grad_norm": 0.5011820793151855, + "kl": 0.18590077757835388, + "learning_rate": 1.7664930068093497e-06, + "loss": 0.0014, + "step": 111, + "tallyqa/reward": 3.3343749046325684, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.12083332985639572, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.192447781562805, + "vsr/reward_std": 0.11305496096611023, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08958333730697632, + "vsr/rewards/gpt_score_reward": 0.8125, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5099807232618332, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 113.95833333333333, + "epoch": 112.0, + "grad_norm": 0.47968146204948425, + "kl": 0.1924248238404592, + "learning_rate": 1.7624425110114479e-06, + "loss": 0.0014, + "step": 112, + "tallyqa/reward": 3.214062452316284, + "tallyqa/reward_std": 0.17738711833953857, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.7033195197582245, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2874999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.7298367023468018, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.85416666666667, + "epoch": 113.0, + "grad_norm": 0.47968146204948425, + "kl": 0.19297024110953012, + "learning_rate": 1.758361915288722e-06, + "loss": 0.0014, + "step": 113, + "tallyqa/reward": 3.4109373092651367, + "tallyqa/reward_std": 0.003125001909211278, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.9231950640678406, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2343748807907104, + "vsr/reward_std": 0.03125, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5069393962621689, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 114.16666666666667, + "epoch": 114.0, + "grad_norm": 0.25086575746536255, + "kl": 0.19972271720568338, + "learning_rate": 1.7542513807361037e-06, + "loss": 0.0015, + "step": 114, + "tallyqa/reward": 3.4749999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2046873569488525, + "vsr/reward_std": 0.09062498807907104, + "vsr/rewards/answer_format_reward": 0.4375, + "vsr/rewards/bleu_score_reward": 0.08750000223517418, + "vsr/rewards/gpt_score_reward": 0.875, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3882710188627243, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.4625000059604645 + }, + { + "completion_length": 118.4375, + "epoch": 115.0, + "grad_norm": 0.45391425490379333, + "kl": 0.209316556652387, + "learning_rate": 1.7501110696304595e-06, + "loss": 0.0016, + "step": 115, + "tallyqa/reward": 3.2677082220713296, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 0.9916666746139526, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.2110761602719625, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9583333333333334, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.375, + "epoch": 116.0, + "grad_norm": 0.3342527747154236, + "kl": 0.19272646307945251, + "learning_rate": 1.7459411454241822e-06, + "loss": 0.0014, + "step": 116, + "tallyqa/reward": 3.132499933242798, + "tallyqa/reward_std": 0.1599999964237213, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.8817993998527527, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3734374046325684, + "vsr/reward_std": 0.015625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6548198759555817, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.54166666666667, + "epoch": 117.0, + "grad_norm": 0.3342527747154236, + "kl": 0.20520279804865518, + "learning_rate": 1.741741772738739e-06, + "loss": 0.0015, + "step": 117, + "tallyqa/reward": 3.190624952316284, + "tallyqa/reward_std": 0.11874999850988388, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.5759168863296509, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.284374952316284, + "vsr/reward_std": 0.13124999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4034843146800995, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 127.36458333333333, + "epoch": 118.0, + "grad_norm": 0.3912048041820526, + "kl": 0.1980737845102946, + "learning_rate": 1.737513117358174e-06, + "loss": 0.0015, + "step": 118, + "tallyqa/reward": 3.4374998807907104, + "tallyqa/reward_std": 0.13750000298023224, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.4644365534186363, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0249998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5707648992538452, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 130.84375, + "epoch": 119.0, + "grad_norm": 0.28441131114959717, + "kl": 0.18950150907039642, + "learning_rate": 1.73325534622256e-06, + "loss": 0.0014, + "step": 119, + "tallyqa/reward": 3.2484374046325684, + "tallyqa/reward_std": 0.12812499701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.6691771447658539, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0874998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.25003159046173096, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.07291666666667, + "epoch": 120.0, + "grad_norm": 0.30278730392456055, + "kl": 0.1873536854982376, + "learning_rate": 1.7289686274214115e-06, + "loss": 0.0014, + "step": 120, + "tallyqa/reward": 3.2812498807907104, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.3345661163330078, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3187499046325684, + "vsr/reward_std": 0.0625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5565975904464722, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 130.32291666666666, + "epoch": 121.0, + "grad_norm": 0.30278730392456055, + "kl": 0.20069065193335214, + "learning_rate": 1.7246531301870467e-06, + "loss": 0.0015, + "step": 121, + "tallyqa/reward": 3.2124998569488525, + "tallyqa/reward_std": 0.125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7086124420166016, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2249999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2614099681377411, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 123.98958333333333, + "epoch": 122.0, + "grad_norm": 0.2798503637313843, + "kl": 0.2033767948547999, + "learning_rate": 1.720309024887907e-06, + "loss": 0.0015, + "step": 122, + "tallyqa/reward": 3.443229079246521, + "tallyqa/reward_std": 0.06354166567325592, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.33522486686706543, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0874998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6440528631210327, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 127.78125, + "epoch": 123.0, + "grad_norm": 0.49700167775154114, + "kl": 0.19741332530975342, + "learning_rate": 1.715936483021831e-06, + "loss": 0.0015, + "step": 123, + "tallyqa/reward": 3.3984373807907104, + "tallyqa/reward_std": 0.015625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.734128475189209, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1114583015441895, + "vsr/reward_std": 0.10208332538604736, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.8563366532325745, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 125.34375, + "epoch": 124.0, + "grad_norm": 1.2107746601104736, + "kl": 0.18995157877604166, + "learning_rate": 1.7115356772092855e-06, + "loss": 0.0014, + "step": 124, + "tallyqa/reward": 3.2437498569488525, + "tallyqa/reward_std": 0.07499999832361937, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.38062700629234314, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3187499046325684, + "vsr/reward_std": 0.1875, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.29582586884498596, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 129.57291666666666, + "epoch": 125.0, + "grad_norm": 1.2107746601104736, + "kl": 0.20850025117397308, + "learning_rate": 1.7071067811865474e-06, + "loss": 0.0016, + "step": 125, + "tallyqa/reward": 3.3812499046325684, + "tallyqa/reward_std": 0.0625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.11135390400886536, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2468749284744263, + "vsr/reward_std": 0.06874999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4343206137418747, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.75, + "epoch": 126.0, + "grad_norm": 0.5225059390068054, + "kl": 0.19507964452107748, + "learning_rate": 1.7026499697988492e-06, + "loss": 0.0015, + "step": 126, + "vsr/reward": 3.2718749046325684, + "vsr/reward_std": 0.06458333134651184, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.06666666766007741, + "vsr/rewards/gpt_score_reward": 0.6666666666666666, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.15802291159828505, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 125.25, + "epoch": 127.0, + "grad_norm": 0.2779540717601776, + "kl": 0.2046880175669988, + "learning_rate": 1.6981654189934727e-06, + "loss": 0.0015, + "step": 127, + "vsr/reward": 3.2208332220713296, + "vsr/reward_std": 0.03333333134651184, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4035410135984421, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.70833333333333, + "epoch": 128.0, + "grad_norm": 0.5138023495674133, + "kl": 0.19684399664402008, + "learning_rate": 1.6936533058128049e-06, + "loss": 0.0015, + "step": 128, + "tallyqa/reward": 3.4546873569488525, + "tallyqa/reward_std": 0.03723391145467758, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6251498460769653, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1796873807907104, + "vsr/reward_std": 0.015625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.05000000074505806, + "vsr/rewards/gpt_score_reward": 0.5, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3518461808562279, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.34375, + "epoch": 129.0, + "grad_norm": 0.5138023495674133, + "kl": 0.19006852308909097, + "learning_rate": 1.6891138083873483e-06, + "loss": 0.0014, + "step": 129, + "vsr/reward": 3.276041587193807, + "vsr/reward_std": 0.05624999602635702, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.06666666766007741, + "vsr/rewards/gpt_score_reward": 0.6666666666666666, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4575161635875702, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 126.51041666666667, + "epoch": 130.0, + "grad_norm": 0.20300109684467316, + "kl": 0.18751830359299979, + "learning_rate": 1.6845471059286886e-06, + "loss": 0.0014, + "step": 130, + "tallyqa/reward": 3.3765623569488525, + "tallyqa/reward_std": 0.07187499850988388, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.30413326621055603, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.9375, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.0718748569488525, + "vsr/reward_std": 0.03125, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.18568988144397736, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.91666666666667, + "epoch": 131.0, + "grad_norm": 0.4048379063606262, + "kl": 0.1999462495247523, + "learning_rate": 1.6799533787224192e-06, + "loss": 0.0015, + "step": 131, + "tallyqa/reward": 3.2734373807907104, + "tallyqa/reward_std": 0.015625, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.3926572762429714, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3187499046325684, + "vsr/reward_std": 0.0625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2874980866909027, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 126.66666666666667, + "epoch": 132.0, + "grad_norm": 0.1570003777742386, + "kl": 0.18750069538752237, + "learning_rate": 1.6753328081210244e-06, + "loss": 0.0014, + "step": 132, + "tallyqa/reward": 3.3124998807907104, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.9433907829225063, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.315624952316284, + "vsr/reward_std": 0.06874999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.7628442049026489, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 130.45833333333334, + "epoch": 133.0, + "grad_norm": 0.1570003777742386, + "kl": 0.17730513215065002, + "learning_rate": 1.6706855765367198e-06, + "loss": 0.0013, + "step": 133, + "tallyqa/reward": 3.2265623807907104, + "tallyqa/reward_std": 0.046875, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.6473097205162048, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.299999952316284, + "vsr/reward_std": 0.09999999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.07500000298023224, + "vsr/rewards/gpt_score_reward": 0.75, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.13092964887619019, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 142.13541666666666, + "epoch": 134.0, + "grad_norm": 0.5272983312606812, + "kl": 0.18585281570752463, + "learning_rate": 1.6660118674342515e-06, + "loss": 0.0014, + "step": 134, + "tallyqa/reward": 3.2468749284744263, + "tallyqa/reward_std": 0.0062499940395355225, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.5467409193515778, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3656249046325684, + "vsr/reward_std": 0.09375, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5660097599029541, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 128.60416666666666, + "epoch": 135.0, + "grad_norm": 0.3489927053451538, + "kl": 0.19390711188316345, + "learning_rate": 1.6613118653236517e-06, + "loss": 0.0015, + "step": 135, + "tallyqa/reward": 3.3874999284744263, + "tallyqa/reward_std": 0.11249999701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.29660436511039734, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1343748569488525, + "vsr/reward_std": 0.03125, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.37586772441864014, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 129.72916666666666, + "epoch": 136.0, + "grad_norm": 0.419140100479126, + "kl": 0.198059673110644, + "learning_rate": 1.6565857557529564e-06, + "loss": 0.0015, + "step": 136, + "tallyqa/reward": 3.4124999046325684, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.6311134099960327, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.231510281562805, + "vsr/reward_std": 0.03593749552965164, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.08958333730697632, + "vsr/rewards/gpt_score_reward": 0.9124999940395355, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3094787746667862, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 127.375, + "epoch": 137.0, + "grad_norm": 0.419140100479126, + "kl": 0.20155884325504303, + "learning_rate": 1.6518337253008787e-06, + "loss": 0.0015, + "step": 137, + "tallyqa/reward": 3.378124952316284, + "tallyqa/reward_std": 0.06874999403953552, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.8315246105194092, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2265623807907104, + "vsr/reward_std": 0.046875, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 0.875, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5565094649791718, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.9375, + "epoch": 138.0, + "grad_norm": 0.39033976197242737, + "kl": 0.20345265169938406, + "learning_rate": 1.6470559615694445e-06, + "loss": 0.0015, + "step": 138, + "tallyqa/reward": 3.140625, + "tallyqa/reward_std": 0.14374999701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -2.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3406249284744263, + "vsr/reward_std": 0.01874999701976776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6503796577453613, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 120.10416666666667, + "epoch": 139.0, + "grad_norm": 0.506568431854248, + "kl": 0.19735519091288248, + "learning_rate": 1.6422526531765844e-06, + "loss": 0.0015, + "step": 139, + "vsr/reward": 3.2093749046325684, + "vsr/reward_std": 0.07664643973112106, + "vsr/rewards/answer_format_reward": 0.4583333333333333, + "vsr/rewards/bleu_score_reward": 0.09166666865348816, + "vsr/rewards/gpt_score_reward": 0.9166666666666666, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5594897270202637, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 118.66666666666667, + "epoch": 140.0, + "grad_norm": 0.9508801102638245, + "kl": 0.19447438418865204, + "learning_rate": 1.6374239897486897e-06, + "loss": 0.0015, + "step": 140, + "tallyqa/reward": 3.2218748728434243, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -1.035646637280782, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 122.63541666666667, + "epoch": 141.0, + "grad_norm": 0.9508801102638245, + "kl": 0.20372503995895386, + "learning_rate": 1.6325701619131245e-06, + "loss": 0.0015, + "step": 141, + "tallyqa/reward": 3.2578123807907104, + "tallyqa/reward_std": 0.046875, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": -0.9976717112585902, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3187499046325684, + "vsr/reward_std": 0.0625, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.27744948863983154, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 128.95833333333334, + "epoch": 142.0, + "grad_norm": 0.5632538795471191, + "kl": 0.19162590305010477, + "learning_rate": 1.6276913612907004e-06, + "loss": 0.0014, + "step": 142, + "tallyqa/reward": 3.3812499046325684, + "tallyqa/reward_std": 0.1875, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7168771028518677, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2570310831069946, + "vsr/reward_std": 0.04843750223517418, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5823599249124527, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 121.54166666666667, + "epoch": 143.0, + "grad_norm": 0.44865813851356506, + "kl": 0.18490644792715707, + "learning_rate": 1.6227877804881126e-06, + "loss": 0.0014, + "step": 143, + "vsr/reward": 3.2374998728434243, + "vsr/reward_std": 0.041666666666666664, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.6260648965835571, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 124.53125, + "epoch": 144.0, + "grad_norm": 0.4362785220146179, + "kl": 0.192943145831426, + "learning_rate": 1.6178596130903343e-06, + "loss": 0.0014, + "step": 144, + "tallyqa/reward": 3.2328124046325684, + "tallyqa/reward_std": 0.09687499701976776, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.12041008472442627, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.875, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.299999952316284, + "vsr/reward_std": 0.09999999403953552, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.2514481246471405, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 115.77083333333333, + "epoch": 145.0, + "grad_norm": 0.4362785220146179, + "kl": 0.18801326056321463, + "learning_rate": 1.6129070536529765e-06, + "loss": 0.0014, + "step": 145, + "tallyqa/reward": 3.3124998807907104, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.3499999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.4703597128391266, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.67708333333333, + "epoch": 146.0, + "grad_norm": 0.30028992891311646, + "kl": 0.21623035271962485, + "learning_rate": 1.6079302976946053e-06, + "loss": 0.0016, + "step": 146, + "tallyqa/reward": 3.2124998569488525, + "tallyqa/reward_std": 0.0, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.8870890140533447, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2265623807907104, + "vsr/reward_std": 0.046875, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.5887175276875496, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.8125, + "epoch": 147.0, + "grad_norm": 0.24446983635425568, + "kl": 0.20485804478327432, + "learning_rate": 1.6029295416890247e-06, + "loss": 0.0015, + "step": 147, + "tallyqa/reward": 3.2468749284744263, + "tallyqa/reward_std": 0.06874999403953552, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.37906908988952637, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1499998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.0, + "vsr/rewards/gpt_score_reward": 0.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.0025195025373250246, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 114.97916666666667, + "epoch": 148.0, + "grad_norm": 0.2216375768184662, + "kl": 0.19786379237969717, + "learning_rate": 1.5979049830575188e-06, + "loss": 0.0015, + "step": 148, + "tallyqa/reward": 3.2968748807907104, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.4371739625930786, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2874999046325684, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.3196384906768799, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 126.04166666666667, + "epoch": 149.0, + "grad_norm": 0.2216375768184662, + "kl": 0.19341517488161722, + "learning_rate": 1.5928568201610592e-06, + "loss": 0.0015, + "step": 149, + "tallyqa/reward": 3.3968749046325684, + "tallyqa/reward_std": 0.03125, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.7688641548156738, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.1499998569488525, + "vsr/reward_std": 0.0, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.769155740737915, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "completion_length": 117.63541666666667, + "epoch": 150.0, + "grad_norm": 0.2557082772254944, + "kl": 0.21229877571264902, + "learning_rate": 1.587785252292473e-06, + "loss": 0.0016, + "step": 150, + "tallyqa/reward": 3.3187499046325684, + "tallyqa/reward_std": 0.03608439117670059, + "tallyqa/rewards/answer_format_reward": 0.5, + "tallyqa/rewards/bleu_score_reward": 0.10000000149011612, + "tallyqa/rewards/gpt_score_reward": 1.0, + "tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.0, + "tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 1.0, + "tallyqa/rewards/repetitive_reward": 0.5, + "tallyqa/rewards/think_and_rethink_format_reward": 0.5, + "vsr/reward": 3.2562499046325684, + "vsr/reward_std": 0.04999999701976776, + "vsr/rewards/answer_format_reward": 0.5, + "vsr/rewards/bleu_score_reward": 0.10000000149011612, + "vsr/rewards/gpt_score_reward": 1.0, + "vsr/rewards/grounded_region_bbox_IOU_loss": 0.43858566880226135, + "vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "vsr/rewards/repetitive_reward": 0.5, + "vsr/rewards/think_and_rethink_format_reward": 0.5 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/vsr_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/vsr_val.jsonl_runtime": 889.1711, + "eval_../mm-cot-data/vsr_val.jsonl_samples_per_second": 0.324, + "eval_../mm-cot-data/vsr_val.jsonl_steps_per_second": 0.002, + "eval_vsr/reward": 2.8251915772755942, + "eval_vsr/reward_std": 0.37300830086072284, + "eval_vsr/rewards/answer_format_reward": 0.5, + "eval_vsr/rewards/bleu_score_reward": 0.07916666567325592, + "eval_vsr/rewards/gpt_score_reward": 0.8020833333333334, + "eval_vsr/rewards/grounded_region_bbox_IOU_loss": 0.34175268809000653, + "eval_vsr/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4895833333333333, + "eval_vsr/rewards/repetitive_reward": 0.5, + "eval_vsr/rewards/think_and_rethink_format_reward": 0.5, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/mme_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mme_val.jsonl_runtime": 765.6717, + "eval_../mm-cot-data/mme_val.jsonl_samples_per_second": 0.313, + "eval_../mm-cot-data/mme_val.jsonl_steps_per_second": 0.001, + "eval_mme_count/reward": 2.91796875, + "eval_mme_count/reward_std": 0.2776575982570648, + "eval_mme_count/rewards/answer_format_reward": 0.46875, + "eval_mme_count/rewards/bleu_score_reward": 0.08749999850988388, + "eval_mme_count/rewards/gpt_score_reward": 0.859375, + "eval_mme_count/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mme_count/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.359375, + "eval_mme_count/rewards/repetitive_reward": 0.5, + "eval_mme_count/rewards/think_and_rethink_format_reward": 0.4749999940395355, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/tallyqa_val.jsonl_runtime": 1521.7102, + "eval_../mm-cot-data/tallyqa_val.jsonl_samples_per_second": 0.323, + "eval_../mm-cot-data/tallyqa_val.jsonl_steps_per_second": 0.001, + "eval_tallyqa/reward": 2.725390613079071, + "eval_tallyqa/reward_std": 0.796591266989708, + "eval_tallyqa/rewards/answer_format_reward": 0.4921875, + "eval_tallyqa/rewards/bleu_score_reward": 0.03906250139698386, + "eval_tallyqa/rewards/gpt_score_reward": 0.390625, + "eval_tallyqa/rewards/grounded_region_bbox_IOU_loss": 0.181011650711298, + "eval_tallyqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.6796875, + "eval_tallyqa/rewards/repetitive_reward": 0.5, + "eval_tallyqa/rewards/think_and_rethink_format_reward": 0.4937499985098839, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/gqa_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/gqa_val.jsonl_runtime": 1304.174, + "eval_../mm-cot-data/gqa_val.jsonl_samples_per_second": 0.39, + "eval_../mm-cot-data/gqa_val.jsonl_steps_per_second": 0.002, + "eval_gqa/reward": 2.740229904651642, + "eval_gqa/reward_std": 0.4232433810830116, + "eval_gqa/rewards/answer_format_reward": 0.5, + "eval_gqa/rewards/bleu_score_reward": 0.055468750186264515, + "eval_gqa/rewards/gpt_score_reward": 0.6937499940395355, + "eval_gqa/rewards/grounded_region_bbox_IOU_loss": 0.36101559922099113, + "eval_gqa/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4765625, + "eval_gqa/rewards/repetitive_reward": 0.5, + "eval_gqa/rewards/think_and_rethink_format_reward": 0.5, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_runtime": 3114.1439, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_samples_per_second": 0.321, + "eval_../mm-cot-data/mathvista_mini_val.jsonl_steps_per_second": 0.001, + "eval_mathvista_mini/reward": 2.188286393880844, + "eval_mathvista_mini/reward_std": 0.6030311584472656, + "eval_mathvista_mini/rewards/answer_format_reward": 0.4765625, + "eval_mathvista_mini/rewards/bleu_score_reward": 0.026356061920523643, + "eval_mathvista_mini/rewards/gpt_score_reward": 0.5484375022351742, + "eval_mathvista_mini/rewards/grounded_region_bbox_IOU_loss": 2.0, + "eval_mathvista_mini/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.17578125, + "eval_mathvista_mini/rewards/repetitive_reward": 0.49699968844652176, + "eval_mathvista_mini/rewards/think_and_rethink_format_reward": 0.47187499329447746, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/pope_coco_val.jsonl_runtime": 1543.12, + "eval_../mm-cot-data/pope_coco_val.jsonl_samples_per_second": 0.324, + "eval_../mm-cot-data/pope_coco_val.jsonl_steps_per_second": 0.001, + "eval_pope_coco/reward": 2.6835935711860657, + "eval_pope_coco/reward_std": 0.4386630058288574, + "eval_pope_coco/rewards/answer_format_reward": 0.4921875, + "eval_pope_coco/rewards/bleu_score_reward": 0.08906250074505806, + "eval_pope_coco/rewards/gpt_score_reward": 0.8796875029802322, + "eval_pope_coco/rewards/grounded_region_bbox_IOU_loss": -0.9634968787431717, + "eval_pope_coco/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.21875, + "eval_pope_coco/rewards/repetitive_reward": 0.5, + "eval_pope_coco/rewards/think_and_rethink_format_reward": 0.4937499985098839, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_position_val.jsonl_runtime": 4185.6461, + "eval_../mm-cot-data/ovd_position_val.jsonl_samples_per_second": 0.513, + "eval_../mm-cot-data/ovd_position_val.jsonl_steps_per_second": 0.002, + "eval_ovd_position/reward": 2.000944936976713, + "eval_ovd_position/reward_std": 0.020203162806437296, + "eval_ovd_position/rewards/answer_format_reward": 0.5, + "eval_ovd_position/rewards/bleu_score_reward": 0.007077206049443167, + "eval_ovd_position/rewards/gpt_score_reward": 0.0, + "eval_ovd_position/rewards/grounded_region_bbox_IOU_loss": 0.3596622268943226, + "eval_ovd_position/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.4944852941176471, + "eval_ovd_position/rewards/repetitive_reward": 0.5, + "eval_ovd_position/rewards/think_and_rethink_format_reward": 0.5, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_runtime": 6819.1299, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_samples_per_second": 0.456, + "eval_../mm-cot-data/ovd_relationship_val.jsonl_steps_per_second": 0.002, + "eval_ovd_relationship/reward": 1.9980156135559082, + "eval_ovd_relationship/reward_std": 0.010514787836000324, + "eval_ovd_relationship/rewards/answer_format_reward": 0.5, + "eval_ovd_relationship/rewards/bleu_score_reward": 0.0018750000325962902, + "eval_ovd_relationship/rewards/gpt_score_reward": 0.0, + "eval_ovd_relationship/rewards/grounded_region_bbox_IOU_loss": 0.28938207983970643, + "eval_ovd_relationship/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.49875, + "eval_ovd_relationship/rewards/repetitive_reward": 0.5, + "eval_ovd_relationship/rewards/think_and_rethink_format_reward": 0.5, + "step": 150 + }, + { + "epoch": 150.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_loss": 0.0, + "eval_../mm-cot-data/ovd_negation_val.jsonl_runtime": 1750.982, + "eval_../mm-cot-data/ovd_negation_val.jsonl_samples_per_second": 0.38, + "eval_../mm-cot-data/ovd_negation_val.jsonl_steps_per_second": 0.002, + "eval_ovd_negation/reward": 1.998771031697591, + "eval_ovd_negation/reward_std": 0.018573568978657324, + "eval_ovd_negation/rewards/answer_format_reward": 0.5, + "eval_ovd_negation/rewards/bleu_score_reward": 0.004166666748157392, + "eval_ovd_negation/rewards/gpt_score_reward": 0.0, + "eval_ovd_negation/rewards/grounded_region_bbox_IOU_loss": 0.37069370845953625, + "eval_ovd_negation/rewards/grounded_region_specific_thinking_format_reward_think_rethink": 0.5, + "eval_ovd_negation/rewards/repetitive_reward": 0.5, + "eval_ovd_negation/rewards/think_and_rethink_format_reward": 0.5, + "step": 150 + } + ], + "logging_steps": 1.0, + "max_steps": 500, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}