diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36043 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1704049247023239, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1623.38671875, + "completions/mean_terminated_length": 1094.482421875, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.32960883900523186, + "epoch": 0.00017040492470232388, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11354459077119827, + "learning_rate": 1e-06, + "loss": 0.0487, + "num_tokens": 494835.0, + "reward": 0.21875, + "reward_std": 0.19970625638961792, + "rewards/simpleverify_reward/mean": 0.21875, + "rewards/simpleverify_reward/std": 0.41420844197273254, + "step": 1, + "tools/generated_tokens": 6151.38671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.68359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1757.83203125, + "completions/mean_terminated_length": 1130.9259033203125, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.3407264407724142, + "epoch": 0.00034080984940464777, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12227898836135864, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 1032424.0, + "reward": 0.21875, + "reward_std": 0.17978152632713318, + "rewards/simpleverify_reward/mean": 0.21875, + "rewards/simpleverify_reward/std": 0.41420844197273254, + "step": 2, + "tools/generated_tokens": 6749.8359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.7578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2013.0, + "completions/mean_length": 1809.6796875, + "completions/mean_terminated_length": 1064.0, + "completions/min_length": 28.0, + "completions/min_terminated_length": 28.0, + "entropy": 0.3413053434342146, + "epoch": 0.0005112147741069717, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.11295190453529358, + "learning_rate": 1e-06, + "loss": 0.0202, + "num_tokens": 1591990.0, + "reward": 0.14453125, + "reward_std": 0.1115587055683136, + "rewards/simpleverify_reward/mean": 0.14453125, + "rewards/simpleverify_reward/std": 0.35231640934944153, + "step": 3, + "tools/generated_tokens": 7393.6875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.7265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.56640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1745.20703125, + "completions/mean_terminated_length": 1349.7117919921875, + "completions/min_length": 68.0, + "completions/min_terminated_length": 68.0, + "entropy": 0.2993578836321831, + "epoch": 0.0006816196988092955, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11197676509618759, + "learning_rate": 1e-06, + "loss": 0.0245, + "num_tokens": 2119995.0, + "reward": 0.3125, + "reward_std": 0.17499089241027832, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 4, + "tools/generated_tokens": 6441.25, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1406.22265625, + "completions/mean_terminated_length": 1014.704345703125, + "completions/min_length": 79.0, + "completions/min_terminated_length": 79.0, + "entropy": 0.3491258881986141, + "epoch": 0.0008520246235116195, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.14115206897258759, + "learning_rate": 1e-06, + "loss": 0.0391, + "num_tokens": 2563172.0, + "reward": 0.33984375, + "reward_std": 0.2401386797428131, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 5, + "tools/generated_tokens": 5398.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.94921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.56640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1625.42578125, + "completions/mean_terminated_length": 1073.4324951171875, + "completions/min_length": 124.0, + "completions/min_terminated_length": 124.0, + "entropy": 0.45098429545760155, + "epoch": 0.0010224295482139435, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15496528148651123, + "learning_rate": 1e-06, + "loss": 0.0365, + "num_tokens": 3068193.0, + "reward": 0.2734375, + "reward_std": 0.2371043860912323, + "rewards/simpleverify_reward/mean": 0.2734375, + "rewards/simpleverify_reward/std": 0.446596622467041, + "step": 6, + "tools/generated_tokens": 6705.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1647.97265625, + "completions/mean_terminated_length": 1215.4227294921875, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.3355453088879585, + "epoch": 0.0011928344729162674, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15678000450134277, + "learning_rate": 1e-06, + "loss": 0.0481, + "num_tokens": 3577610.0, + "reward": 0.36328125, + "reward_std": 0.25734785199165344, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 7, + "tools/generated_tokens": 6591.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.53515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1603.62109375, + "completions/mean_terminated_length": 1092.0252685546875, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.35666714422404766, + "epoch": 0.001363239397618591, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12019651383161545, + "learning_rate": 1e-06, + "loss": 0.002, + "num_tokens": 4080745.0, + "reward": 0.17578125, + "reward_std": 0.17626741528511047, + "rewards/simpleverify_reward/mean": 0.17578125, + "rewards/simpleverify_reward/std": 0.3813795745372772, + "step": 8, + "tools/generated_tokens": 6115.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.57421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1702.17578125, + "completions/mean_terminated_length": 1235.8072509765625, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.36048993095755577, + "epoch": 0.001533644322320915, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11520885676145554, + "learning_rate": 1e-06, + "loss": 0.021, + "num_tokens": 4598038.0, + "reward": 0.15234375, + "reward_std": 0.17712010443210602, + "rewards/simpleverify_reward/mean": 0.15234375, + "rewards/simpleverify_reward/std": 0.3600577116012573, + "step": 9, + "tools/generated_tokens": 6598.18359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.62109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1667.6640625, + "completions/mean_terminated_length": 1044.2474365234375, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.35148809291422367, + "epoch": 0.001704049247023239, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13783320784568787, + "learning_rate": 1e-06, + "loss": 0.0335, + "num_tokens": 5109184.0, + "reward": 0.29296875, + "reward_std": 0.2421935796737671, + "rewards/simpleverify_reward/mean": 0.29296875, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 10, + "tools/generated_tokens": 6475.671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.34765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1735.77734375, + "completions/mean_terminated_length": 1264.411865234375, + "completions/min_length": 106.0, + "completions/min_terminated_length": 106.0, + "entropy": 0.3556172177195549, + "epoch": 0.0018744541717255628, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.09855224937200546, + "learning_rate": 1e-06, + "loss": 0.0304, + "num_tokens": 5643399.0, + "reward": 0.203125, + "reward_std": 0.10519562661647797, + "rewards/simpleverify_reward/mean": 0.203125, + "rewards/simpleverify_reward/std": 0.40311288833618164, + "step": 11, + "tools/generated_tokens": 6807.8046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1592.29296875, + "completions/mean_terminated_length": 1232.19580078125, + "completions/min_length": 21.0, + "completions/min_terminated_length": 21.0, + "entropy": 0.32066681049764156, + "epoch": 0.002044859096427887, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13096962869167328, + "learning_rate": 1e-06, + "loss": 0.0284, + "num_tokens": 6133570.0, + "reward": 0.3125, + "reward_std": 0.17385752499103546, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 12, + "tools/generated_tokens": 5880.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.09375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1815.23828125, + "completions/mean_terminated_length": 1054.8834228515625, + "completions/min_length": 45.0, + "completions/min_terminated_length": 45.0, + "entropy": 0.3145635910332203, + "epoch": 0.0022152640211302106, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.11477241665124893, + "learning_rate": 1e-06, + "loss": 0.0174, + "num_tokens": 6688703.0, + "reward": 0.15625, + "reward_std": 0.1354367733001709, + "rewards/simpleverify_reward/mean": 0.15625, + "rewards/simpleverify_reward/std": 0.3638034462928772, + "step": 13, + "tools/generated_tokens": 7303.23828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1666.7109375, + "completions/mean_terminated_length": 1160.654541015625, + "completions/min_length": 87.0, + "completions/min_terminated_length": 87.0, + "entropy": 0.3039297014474869, + "epoch": 0.0023856689458325348, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13038323819637299, + "learning_rate": 1e-06, + "loss": 0.0159, + "num_tokens": 7212133.0, + "reward": 0.22265625, + "reward_std": 0.18738040328025818, + "rewards/simpleverify_reward/mean": 0.22265625, + "rewards/simpleverify_reward/std": 0.41684433817863464, + "step": 14, + "tools/generated_tokens": 6506.72265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1575.6953125, + "completions/mean_terminated_length": 1171.847900390625, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.40485683642327785, + "epoch": 0.0025560738705348585, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15688402950763702, + "learning_rate": 1e-06, + "loss": 0.0193, + "num_tokens": 7701367.0, + "reward": 0.30859375, + "reward_std": 0.2559266686439514, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 15, + "tools/generated_tokens": 6167.703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.2421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.53515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1618.70703125, + "completions/mean_terminated_length": 1124.4874267578125, + "completions/min_length": 46.0, + "completions/min_terminated_length": 46.0, + "entropy": 0.35048897564411163, + "epoch": 0.002726478795237182, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.17330823838710785, + "learning_rate": 1e-06, + "loss": 0.0364, + "num_tokens": 8201052.0, + "reward": 0.3671875, + "reward_std": 0.15261822938919067, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 16, + "tools/generated_tokens": 6266.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.26953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1644.9765625, + "completions/mean_terminated_length": 1215.9595947265625, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.36185348220169544, + "epoch": 0.0028968837199395063, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11988542228937149, + "learning_rate": 1e-06, + "loss": 0.0412, + "num_tokens": 8714230.0, + "reward": 0.3203125, + "reward_std": 0.1737399697303772, + "rewards/simpleverify_reward/mean": 0.3203125, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 17, + "tools/generated_tokens": 6420.98828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.33203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.47265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1607.0625, + "completions/mean_terminated_length": 1211.86669921875, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.3239676281809807, + "epoch": 0.00306728864464183, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11701221764087677, + "learning_rate": 1e-06, + "loss": 0.0026, + "num_tokens": 9216486.0, + "reward": 0.390625, + "reward_std": 0.1892854869365692, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 18, + "tools/generated_tokens": 6311.07421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.48828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1551.7265625, + "completions/mean_terminated_length": 1078.198486328125, + "completions/min_length": 23.0, + "completions/min_terminated_length": 23.0, + "entropy": 0.3650179672986269, + "epoch": 0.003237693569344154, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12444363534450531, + "learning_rate": 1e-06, + "loss": 0.0311, + "num_tokens": 9699552.0, + "reward": 0.35546875, + "reward_std": 0.16483555734157562, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 19, + "tools/generated_tokens": 5847.74609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.09765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1714.6875, + "completions/mean_terminated_length": 1194.72998046875, + "completions/min_length": 60.0, + "completions/min_terminated_length": 60.0, + "entropy": 0.35892440751194954, + "epoch": 0.003408098494046478, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12233246117830276, + "learning_rate": 1e-06, + "loss": 0.0165, + "num_tokens": 10234128.0, + "reward": 0.1953125, + "reward_std": 0.15176509320735931, + "rewards/simpleverify_reward/mean": 0.1953125, + "rewards/simpleverify_reward/std": 0.39721766114234924, + "step": 20, + "tools/generated_tokens": 7010.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.5859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.70703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2017.0, + "completions/mean_length": 1769.3046875, + "completions/mean_terminated_length": 1096.719970703125, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.3672831766307354, + "epoch": 0.003578503418748802, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.09308706223964691, + "learning_rate": 1e-06, + "loss": 0.017, + "num_tokens": 10780926.0, + "reward": 0.14453125, + "reward_std": 0.138350710272789, + "rewards/simpleverify_reward/mean": 0.14453125, + "rewards/simpleverify_reward/std": 0.35231640934944153, + "step": 21, + "tools/generated_tokens": 7305.30859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1680.05078125, + "completions/mean_terminated_length": 1191.727294921875, + "completions/min_length": 101.0, + "completions/min_terminated_length": 101.0, + "entropy": 0.2959140334278345, + "epoch": 0.0037489083434511256, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.10992983728647232, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 11303227.0, + "reward": 0.28125, + "reward_std": 0.1999323070049286, + "rewards/simpleverify_reward/mean": 0.28125, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 22, + "tools/generated_tokens": 6648.078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.42578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.52734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1579.7734375, + "completions/mean_terminated_length": 1057.4296875, + "completions/min_length": 13.0, + "completions/min_terminated_length": 13.0, + "entropy": 0.3190094195306301, + "epoch": 0.00391931326815345, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1401917040348053, + "learning_rate": 1e-06, + "loss": 0.0251, + "num_tokens": 11800945.0, + "reward": 0.33984375, + "reward_std": 0.1824694126844406, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 23, + "tools/generated_tokens": 6131.80859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.22265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.58984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1689.01953125, + "completions/mean_terminated_length": 1172.79052734375, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.35156455263495445, + "epoch": 0.004089718192855774, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19344040751457214, + "learning_rate": 1e-06, + "loss": 0.031, + "num_tokens": 12317702.0, + "reward": 0.2890625, + "reward_std": 0.20389671623706818, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 24, + "tools/generated_tokens": 6561.03125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1676.98046875, + "completions/mean_terminated_length": 1116.813720703125, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.341730497777462, + "epoch": 0.004260123117558097, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11004383862018585, + "learning_rate": 1e-06, + "loss": 0.0323, + "num_tokens": 12837921.0, + "reward": 0.1875, + "reward_std": 0.18843428790569305, + "rewards/simpleverify_reward/mean": 0.1875, + "rewards/simpleverify_reward/std": 0.3910769522190094, + "step": 25, + "tools/generated_tokens": 6604.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.40625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.46484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1544.796875, + "completions/mean_terminated_length": 1107.72265625, + "completions/min_length": 260.0, + "completions/min_terminated_length": 260.0, + "entropy": 0.31659174151718616, + "epoch": 0.004430528042260421, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.10992801189422607, + "learning_rate": 1e-06, + "loss": 0.021, + "num_tokens": 13324413.0, + "reward": 0.2890625, + "reward_std": 0.1932636797428131, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 26, + "tools/generated_tokens": 6064.8046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.38671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1475.1015625, + "completions/mean_terminated_length": 1113.85986328125, + "completions/min_length": 36.0, + "completions/min_terminated_length": 36.0, + "entropy": 0.31422682851552963, + "epoch": 0.004600932966962745, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1931154876947403, + "learning_rate": 1e-06, + "loss": 0.0217, + "num_tokens": 13792375.0, + "reward": 0.46875, + "reward_std": 0.2512108087539673, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 27, + "tools/generated_tokens": 5867.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.14453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2004.0, + "completions/mean_length": 1601.94921875, + "completions/mean_terminated_length": 1134.488037109375, + "completions/min_length": 20.0, + "completions/min_terminated_length": 20.0, + "entropy": 0.32225533202290535, + "epoch": 0.0047713378916650695, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.12505559623241425, + "learning_rate": 1e-06, + "loss": 0.0454, + "num_tokens": 14291850.0, + "reward": 0.4140625, + "reward_std": 0.21039125323295593, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 28, + "tools/generated_tokens": 6393.95703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.33984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.55078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1672.54296875, + "completions/mean_terminated_length": 1212.2086181640625, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "entropy": 0.32055498845875263, + "epoch": 0.004941742816367393, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16482892632484436, + "learning_rate": 1e-06, + "loss": 0.0237, + "num_tokens": 14802213.0, + "reward": 0.375, + "reward_std": 0.24836406111717224, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 29, + "tools/generated_tokens": 6160.546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1507.3359375, + "completions/mean_terminated_length": 1059.3642578125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "entropy": 0.36279159784317017, + "epoch": 0.005112147741069717, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12792861461639404, + "learning_rate": 1e-06, + "loss": 0.0252, + "num_tokens": 15271867.0, + "reward": 0.34765625, + "reward_std": 0.21448004245758057, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 30, + "tools/generated_tokens": 5795.3359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.09375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.43359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1525.83984375, + "completions/mean_terminated_length": 1126.1171875, + "completions/min_length": 31.0, + "completions/min_terminated_length": 31.0, + "entropy": 0.40453204698860645, + "epoch": 0.005282552665772041, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14503608644008636, + "learning_rate": 1e-06, + "loss": 0.0195, + "num_tokens": 15751746.0, + "reward": 0.3046875, + "reward_std": 0.2298790067434311, + "rewards/simpleverify_reward/mean": 0.3046875, + "rewards/simpleverify_reward/std": 0.4611765742301941, + "step": 31, + "tools/generated_tokens": 6157.84375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.26171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.46875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1549.65234375, + "completions/mean_terminated_length": 1109.9559326171875, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.3383567910641432, + "epoch": 0.005452957590474364, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1525263786315918, + "learning_rate": 1e-06, + "loss": 0.0392, + "num_tokens": 16243353.0, + "reward": 0.38671875, + "reward_std": 0.2071847766637802, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 32, + "tools/generated_tokens": 5877.671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.11328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2004.0, + "completions/mean_length": 1602.7265625, + "completions/mean_terminated_length": 1136.112060546875, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.36383174173533916, + "epoch": 0.0056233625151766884, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.15686126053333282, + "learning_rate": 1e-06, + "loss": 0.024, + "num_tokens": 16742019.0, + "reward": 0.3359375, + "reward_std": 0.2700601816177368, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 33, + "tools/generated_tokens": 6618.74609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.44921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1344.25390625, + "completions/mean_terminated_length": 988.2470703125, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.36758890748023987, + "epoch": 0.005793767439879013, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16702856123447418, + "learning_rate": 1e-06, + "loss": 0.0066, + "num_tokens": 17168116.0, + "reward": 0.28125, + "reward_std": 0.22981059551239014, + "rewards/simpleverify_reward/mean": 0.28125, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 34, + "tools/generated_tokens": 5288.26171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.92578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1457.4375, + "completions/mean_terminated_length": 1137.2650146484375, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.3349355608224869, + "epoch": 0.005964172364581337, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11891477555036545, + "learning_rate": 1e-06, + "loss": 0.03, + "num_tokens": 17627588.0, + "reward": 0.3359375, + "reward_std": 0.17693254351615906, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 35, + "tools/generated_tokens": 5873.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.15625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1471.2421875, + "completions/mean_terminated_length": 1022.6597290039062, + "completions/min_length": 15.0, + "completions/min_terminated_length": 15.0, + "entropy": 0.3687310889363289, + "epoch": 0.00613457728928366, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15980218350887299, + "learning_rate": 1e-06, + "loss": 0.0302, + "num_tokens": 18087282.0, + "reward": 0.3671875, + "reward_std": 0.3007515072822571, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 36, + "tools/generated_tokens": 5999.25390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1500.85546875, + "completions/mean_terminated_length": 1054.616943359375, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.3143069688230753, + "epoch": 0.006304982213985984, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12282077223062515, + "learning_rate": 1e-06, + "loss": 0.0125, + "num_tokens": 18553053.0, + "reward": 0.2734375, + "reward_std": 0.17862266302108765, + "rewards/simpleverify_reward/mean": 0.2734375, + "rewards/simpleverify_reward/std": 0.446596622467041, + "step": 37, + "tools/generated_tokens": 5604.875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.00390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1652.72265625, + "completions/mean_terminated_length": 1225.333251953125, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "entropy": 0.3574356138706207, + "epoch": 0.006475387138688308, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.08517470210790634, + "learning_rate": 1e-06, + "loss": 0.0257, + "num_tokens": 19063382.0, + "reward": 0.203125, + "reward_std": 0.14704003930091858, + "rewards/simpleverify_reward/mean": 0.203125, + "rewards/simpleverify_reward/std": 0.40311288833618164, + "step": 38, + "tools/generated_tokens": 6236.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.23828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2016.0, + "completions/mean_length": 1862.078125, + "completions/mean_terminated_length": 1254.7333984375, + "completions/min_length": 106.0, + "completions/min_terminated_length": 106.0, + "entropy": 0.32641259767115116, + "epoch": 0.006645792063390632, + "frac_reward_zero_std": 0.8125, + "grad_norm": 0.05535029247403145, + "learning_rate": 1e-06, + "loss": 0.0114, + "num_tokens": 19634682.0, + "reward": 0.0703125, + "reward_std": 0.08539125323295593, + "rewards/simpleverify_reward/mean": 0.0703125, + "rewards/simpleverify_reward/std": 0.2561737895011902, + "step": 39, + "tools/generated_tokens": 7454.078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1432.1953125, + "completions/mean_terminated_length": 1050.253173828125, + "completions/min_length": 17.0, + "completions/min_terminated_length": 17.0, + "entropy": 0.38021427020430565, + "epoch": 0.006816196988092956, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13984593749046326, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 20088108.0, + "reward": 0.53515625, + "reward_std": 0.18213960528373718, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 40, + "tools/generated_tokens": 5464.21484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.96875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1978.0, + "completions/mean_length": 1794.15625, + "completions/mean_terminated_length": 1092.36767578125, + "completions/min_length": 67.0, + "completions/min_terminated_length": 67.0, + "entropy": 0.3053403776139021, + "epoch": 0.00698660191279528, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.11249354481697083, + "learning_rate": 1e-06, + "loss": 0.0218, + "num_tokens": 20640228.0, + "reward": 0.2578125, + "reward_std": 0.1900683045387268, + "rewards/simpleverify_reward/mean": 0.2578125, + "rewards/simpleverify_reward/std": 0.4382871091365814, + "step": 41, + "tools/generated_tokens": 7122.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.53515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1605.3515625, + "completions/mean_terminated_length": 1095.75634765625, + "completions/min_length": 104.0, + "completions/min_terminated_length": 104.0, + "entropy": 0.3565551396459341, + "epoch": 0.007157006837497604, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14736947417259216, + "learning_rate": 1e-06, + "loss": 0.0609, + "num_tokens": 21142254.0, + "reward": 0.25390625, + "reward_std": 0.17968884110450745, + "rewards/simpleverify_reward/mean": 0.25390625, + "rewards/simpleverify_reward/std": 0.4360972046852112, + "step": 42, + "tools/generated_tokens": 6397.35546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.33984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1620.98046875, + "completions/mean_terminated_length": 1180.40478515625, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.31288580037653446, + "epoch": 0.007327411762199928, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1469164937734604, + "learning_rate": 1e-06, + "loss": 0.0412, + "num_tokens": 21642633.0, + "reward": 0.35546875, + "reward_std": 0.27475816011428833, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 43, + "tools/generated_tokens": 6460.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1577.40234375, + "completions/mean_terminated_length": 1187.4786376953125, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.37273502349853516, + "epoch": 0.007497816686902251, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14188292622566223, + "learning_rate": 1e-06, + "loss": 0.0473, + "num_tokens": 22148912.0, + "reward": 0.30078125, + "reward_std": 0.24556957185268402, + "rewards/simpleverify_reward/mean": 0.30078125, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 44, + "tools/generated_tokens": 6313.40625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1561.4453125, + "completions/mean_terminated_length": 1145.4130859375, + "completions/min_length": 107.0, + "completions/min_terminated_length": 107.0, + "entropy": 0.3211830984801054, + "epoch": 0.007668221611604575, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11664831638336182, + "learning_rate": 1e-06, + "loss": 0.0315, + "num_tokens": 22635714.0, + "reward": 0.3671875, + "reward_std": 0.2108054757118225, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 45, + "tools/generated_tokens": 6201.46875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1571.8046875, + "completions/mean_terminated_length": 1201.4722900390625, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.3283666502684355, + "epoch": 0.0078386265363069, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.139494389295578, + "learning_rate": 1e-06, + "loss": 0.0478, + "num_tokens": 23124672.0, + "reward": 0.41015625, + "reward_std": 0.20382197201251984, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 46, + "tools/generated_tokens": 5931.8515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.12890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.53515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1610.671875, + "completions/mean_terminated_length": 1107.2017822265625, + "completions/min_length": 140.0, + "completions/min_terminated_length": 140.0, + "entropy": 0.3429228141903877, + "epoch": 0.008009031461009224, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.1430044174194336, + "learning_rate": 1e-06, + "loss": 0.0081, + "num_tokens": 23628716.0, + "reward": 0.390625, + "reward_std": 0.10331955552101135, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 47, + "tools/generated_tokens": 6210.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.43359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1567.9765625, + "completions/mean_terminated_length": 1200.5103759765625, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.35710458643734455, + "epoch": 0.008179436385711548, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.12251273542642593, + "learning_rate": 1e-06, + "loss": 0.0403, + "num_tokens": 24115142.0, + "reward": 0.43359375, + "reward_std": 0.12082535773515701, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 48, + "tools/generated_tokens": 5895.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.11328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1555.0625, + "completions/mean_terminated_length": 1195.3514404296875, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.3559937682002783, + "epoch": 0.00834984131041387, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1743675172328949, + "learning_rate": 1e-06, + "loss": 0.0072, + "num_tokens": 24594118.0, + "reward": 0.34765625, + "reward_std": 0.17286168038845062, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 49, + "tools/generated_tokens": 5531.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.94140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1506.8359375, + "completions/mean_terminated_length": 1233.070556640625, + "completions/min_length": 84.0, + "completions/min_terminated_length": 84.0, + "entropy": 0.31781978718936443, + "epoch": 0.008520246235116194, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19409367442131042, + "learning_rate": 1e-06, + "loss": -0.0134, + "num_tokens": 25071788.0, + "reward": 0.4765625, + "reward_std": 0.26829975843429565, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 50, + "tools/generated_tokens": 5642.84765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.01953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1465.50390625, + "completions/mean_terminated_length": 1160.3988037109375, + "completions/min_length": 80.0, + "completions/min_terminated_length": 80.0, + "entropy": 0.3548562824726105, + "epoch": 0.008690651159818518, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1465141773223877, + "learning_rate": 1e-06, + "loss": 0.0526, + "num_tokens": 25534637.0, + "reward": 0.55859375, + "reward_std": 0.2823812961578369, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 51, + "tools/generated_tokens": 5801.51171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.1171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1404.16796875, + "completions/mean_terminated_length": 1042.993896484375, + "completions/min_length": 79.0, + "completions/min_terminated_length": 79.0, + "entropy": 0.32291222736239433, + "epoch": 0.008861056084520843, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1312384307384491, + "learning_rate": 1e-06, + "loss": 0.0331, + "num_tokens": 25980520.0, + "reward": 0.37890625, + "reward_std": 0.23091968894004822, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 52, + "tools/generated_tokens": 5452.1796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1470.67578125, + "completions/mean_terminated_length": 1118.4716796875, + "completions/min_length": 17.0, + "completions/min_terminated_length": 17.0, + "entropy": 0.3883262947201729, + "epoch": 0.009031461009223167, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1563533991575241, + "learning_rate": 1e-06, + "loss": -0.0133, + "num_tokens": 26438693.0, + "reward": 0.42578125, + "reward_std": 0.18032719194889069, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 53, + "tools/generated_tokens": 5510.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.97265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1993.0, + "completions/mean_length": 1653.49609375, + "completions/mean_terminated_length": 1095.2452392578125, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.3353212848305702, + "epoch": 0.00920186593392549, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.09876301139593124, + "learning_rate": 1e-06, + "loss": 0.0151, + "num_tokens": 26953028.0, + "reward": 0.33984375, + "reward_std": 0.12082062661647797, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 54, + "tools/generated_tokens": 6589.5, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.41015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1426.5625, + "completions/mean_terminated_length": 1041.1138916015625, + "completions/min_length": 31.0, + "completions/min_terminated_length": 31.0, + "entropy": 0.3311825506389141, + "epoch": 0.009372270858627815, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14525867998600006, + "learning_rate": 1e-06, + "loss": 0.0471, + "num_tokens": 27426644.0, + "reward": 0.40625, + "reward_std": 0.18923160433769226, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 55, + "tools/generated_tokens": 5778.56640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1590.58203125, + "completions/mean_terminated_length": 1111.216064453125, + "completions/min_length": 149.0, + "completions/min_terminated_length": 149.0, + "entropy": 0.3789573274552822, + "epoch": 0.009542675783330139, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15582266449928284, + "learning_rate": 1e-06, + "loss": 0.013, + "num_tokens": 27919465.0, + "reward": 0.296875, + "reward_std": 0.250201940536499, + "rewards/simpleverify_reward/mean": 0.296875, + "rewards/simpleverify_reward/std": 0.45777595043182373, + "step": 56, + "tools/generated_tokens": 6278.58203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.2890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.42578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2005.0, + "completions/mean_length": 1474.25, + "completions/mean_terminated_length": 1048.8231201171875, + "completions/min_length": 36.0, + "completions/min_terminated_length": 36.0, + "entropy": 0.3115955535322428, + "epoch": 0.009713080708032461, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.11107443273067474, + "learning_rate": 1e-06, + "loss": 0.0304, + "num_tokens": 28390505.0, + "reward": 0.4453125, + "reward_std": 0.1519911289215088, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 57, + "tools/generated_tokens": 5698.2578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2006.0, + "completions/mean_length": 1476.76953125, + "completions/mean_terminated_length": 1085.9276123046875, + "completions/min_length": 9.0, + "completions/min_terminated_length": 9.0, + "entropy": 0.3250275757163763, + "epoch": 0.009883485632734786, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.12432430684566498, + "learning_rate": 1e-06, + "loss": -0.0352, + "num_tokens": 28856958.0, + "reward": 0.44921875, + "reward_std": 0.19398343563079834, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 58, + "tools/generated_tokens": 5876.765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.1484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2021.0, + "completions/mean_length": 1483.8125, + "completions/mean_terminated_length": 1091.4967041015625, + "completions/min_length": 101.0, + "completions/min_terminated_length": 101.0, + "entropy": 0.32753048464655876, + "epoch": 0.01005389055743711, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15509773790836334, + "learning_rate": 1e-06, + "loss": 0.0129, + "num_tokens": 29323134.0, + "reward": 0.25390625, + "reward_std": 0.22962586581707, + "rewards/simpleverify_reward/mean": 0.25390625, + "rewards/simpleverify_reward/std": 0.4360972046852112, + "step": 59, + "tools/generated_tokens": 5899.81640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.15625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.48828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1534.98828125, + "completions/mean_terminated_length": 1045.488525390625, + "completions/min_length": 104.0, + "completions/min_terminated_length": 104.0, + "entropy": 0.3313278928399086, + "epoch": 0.010224295482139434, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15667590498924255, + "learning_rate": 1e-06, + "loss": -0.0044, + "num_tokens": 29806059.0, + "reward": 0.30859375, + "reward_std": 0.17838552594184875, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 60, + "tools/generated_tokens": 5863.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.11328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.61328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1671.04296875, + "completions/mean_terminated_length": 1073.3837890625, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.3573396895080805, + "epoch": 0.010394700406841758, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.8728443384170532, + "learning_rate": 1e-06, + "loss": 0.0411, + "num_tokens": 30323014.0, + "reward": 0.26953125, + "reward_std": 0.20187556743621826, + "rewards/simpleverify_reward/mean": 0.26953125, + "rewards/simpleverify_reward/std": 0.44458550214767456, + "step": 61, + "tools/generated_tokens": 6735.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.54296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1607.87890625, + "completions/mean_terminated_length": 1085.01708984375, + "completions/min_length": 14.0, + "completions/min_terminated_length": 14.0, + "entropy": 0.346057066693902, + "epoch": 0.010565105331544082, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1445242166519165, + "learning_rate": 1e-06, + "loss": 0.0588, + "num_tokens": 30833687.0, + "reward": 0.38671875, + "reward_std": 0.2286548763513565, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 62, + "tools/generated_tokens": 6671.90234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2011.0, + "completions/mean_length": 1459.421875, + "completions/mean_terminated_length": 1094.3544921875, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "entropy": 0.3540453128516674, + "epoch": 0.010735510256246406, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11674729734659195, + "learning_rate": 1e-06, + "loss": -0.0057, + "num_tokens": 31292387.0, + "reward": 0.4921875, + "reward_std": 0.15074022114276886, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 63, + "tools/generated_tokens": 5411.421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.50390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1577.82421875, + "completions/mean_terminated_length": 1100.251953125, + "completions/min_length": 10.0, + "completions/min_terminated_length": 10.0, + "entropy": 0.3563331104815006, + "epoch": 0.010905915180948729, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13753943145275116, + "learning_rate": 1e-06, + "loss": 0.0228, + "num_tokens": 31787462.0, + "reward": 0.29296875, + "reward_std": 0.25158432126045227, + "rewards/simpleverify_reward/mean": 0.29296875, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 64, + "tools/generated_tokens": 6313.83203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1578.86328125, + "completions/mean_terminated_length": 1151.753662109375, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "entropy": 0.3383399248123169, + "epoch": 0.011076320105651053, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1068626344203949, + "learning_rate": 1e-06, + "loss": 0.0088, + "num_tokens": 32280979.0, + "reward": 0.39453125, + "reward_std": 0.13039018213748932, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 65, + "tools/generated_tokens": 5890.87890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.10546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1475.69921875, + "completions/mean_terminated_length": 1108.8590087890625, + "completions/min_length": 21.0, + "completions/min_terminated_length": 21.0, + "entropy": 0.3287957701832056, + "epoch": 0.011246725030353377, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1338070183992386, + "learning_rate": 1e-06, + "loss": 0.0038, + "num_tokens": 32748454.0, + "reward": 0.2890625, + "reward_std": 0.2060832381248474, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 66, + "tools/generated_tokens": 5475.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1533.21875, + "completions/mean_terminated_length": 1157.567626953125, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.3912510294467211, + "epoch": 0.011417129955055701, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.18103700876235962, + "learning_rate": 1e-06, + "loss": 0.0275, + "num_tokens": 33230286.0, + "reward": 0.33984375, + "reward_std": 0.1544148027896881, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 67, + "tools/generated_tokens": 5981.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1698.58984375, + "completions/mean_terminated_length": 1263.359619140625, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.3388095647096634, + "epoch": 0.011587534879758025, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1136864721775055, + "learning_rate": 1e-06, + "loss": 0.037, + "num_tokens": 33749509.0, + "reward": 0.265625, + "reward_std": 0.1779082715511322, + "rewards/simpleverify_reward/mean": 0.265625, + "rewards/simpleverify_reward/std": 0.4425306022167206, + "step": 68, + "tools/generated_tokens": 6474.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.33203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.66015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1711.87890625, + "completions/mean_terminated_length": 1058.9654541015625, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.3804211299866438, + "epoch": 0.01175793980446035, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.10970202088356018, + "learning_rate": 1e-06, + "loss": 0.0261, + "num_tokens": 34278182.0, + "reward": 0.15625, + "reward_std": 0.1355944126844406, + "rewards/simpleverify_reward/mean": 0.15625, + "rewards/simpleverify_reward/std": 0.3638034462928772, + "step": 69, + "tools/generated_tokens": 7031.890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.47265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1503.2578125, + "completions/mean_terminated_length": 1015.0147705078125, + "completions/min_length": 112.0, + "completions/min_terminated_length": 112.0, + "entropy": 0.3382138181477785, + "epoch": 0.011928344729162673, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15408317744731903, + "learning_rate": 1e-06, + "loss": 0.0628, + "num_tokens": 34751448.0, + "reward": 0.23828125, + "reward_std": 0.2356673926115036, + "rewards/simpleverify_reward/mean": 0.23828125, + "rewards/simpleverify_reward/std": 0.4268665909767151, + "step": 70, + "tools/generated_tokens": 6231.265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.61328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1681.09375, + "completions/mean_terminated_length": 1099.2322998046875, + "completions/min_length": 99.0, + "completions/min_terminated_length": 99.0, + "entropy": 0.3789903335273266, + "epoch": 0.012098749653864998, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1188197210431099, + "learning_rate": 1e-06, + "loss": 0.0079, + "num_tokens": 35273824.0, + "reward": 0.28125, + "reward_std": 0.1504095196723938, + "rewards/simpleverify_reward/mean": 0.28125, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 71, + "tools/generated_tokens": 6985.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.58984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1689.72265625, + "completions/mean_terminated_length": 1182.745361328125, + "completions/min_length": 114.0, + "completions/min_terminated_length": 114.0, + "entropy": 0.3729398362338543, + "epoch": 0.01226915457856732, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15884780883789062, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 35798889.0, + "reward": 0.328125, + "reward_std": 0.20214100182056427, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 72, + "tools/generated_tokens": 6577.73828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.38671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.32421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1412.16796875, + "completions/mean_terminated_length": 1107.121337890625, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.34434461034834385, + "epoch": 0.012439559503269644, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1620829850435257, + "learning_rate": 1e-06, + "loss": 0.0019, + "num_tokens": 36248548.0, + "reward": 0.43359375, + "reward_std": 0.19619880616664886, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 73, + "tools/generated_tokens": 5764.1796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1368.33984375, + "completions/mean_terminated_length": 1076.0, + "completions/min_length": 13.0, + "completions/min_terminated_length": 13.0, + "entropy": 0.3357909843325615, + "epoch": 0.012609964427971968, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14101873338222504, + "learning_rate": 1e-06, + "loss": 0.0508, + "num_tokens": 36680971.0, + "reward": 0.5703125, + "reward_std": 0.2199607938528061, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 74, + "tools/generated_tokens": 5272.359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.90625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.61328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1736.640625, + "completions/mean_terminated_length": 1242.86865234375, + "completions/min_length": 140.0, + "completions/min_terminated_length": 140.0, + "entropy": 0.35474786534905434, + "epoch": 0.012780369352674292, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.10660536587238312, + "learning_rate": 1e-06, + "loss": 0.0109, + "num_tokens": 37218543.0, + "reward": 0.2890625, + "reward_std": 0.16691282391548157, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 75, + "tools/generated_tokens": 6608.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1656.92578125, + "completions/mean_terminated_length": 1199.5762939453125, + "completions/min_length": 67.0, + "completions/min_terminated_length": 67.0, + "entropy": 0.3315989449620247, + "epoch": 0.012950774277376616, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.1214306652545929, + "learning_rate": 1e-06, + "loss": 0.0243, + "num_tokens": 37729004.0, + "reward": 0.2578125, + "reward_std": 0.10065875202417374, + "rewards/simpleverify_reward/mean": 0.2578125, + "rewards/simpleverify_reward/std": 0.4382871091365814, + "step": 76, + "tools/generated_tokens": 6480.953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1479.21875, + "completions/mean_terminated_length": 1064.1689453125, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.3326445445418358, + "epoch": 0.01312117920207894, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1574394851922989, + "learning_rate": 1e-06, + "loss": 0.0415, + "num_tokens": 38193348.0, + "reward": 0.4296875, + "reward_std": 0.258681058883667, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 77, + "tools/generated_tokens": 5703.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.32421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1442.6796875, + "completions/mean_terminated_length": 1152.2716064453125, + "completions/min_length": 15.0, + "completions/min_terminated_length": 15.0, + "entropy": 0.342384722083807, + "epoch": 0.013291584126781265, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15261264145374298, + "learning_rate": 1e-06, + "loss": -0.0514, + "num_tokens": 38640738.0, + "reward": 0.5390625, + "reward_std": 0.23635752499103546, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 78, + "tools/generated_tokens": 5114.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.79296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1415.203125, + "completions/mean_terminated_length": 1029.1572265625, + "completions/min_length": 15.0, + "completions/min_terminated_length": 15.0, + "entropy": 0.3426816575229168, + "epoch": 0.013461989051483587, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.2051922231912613, + "learning_rate": 1e-06, + "loss": 0.0292, + "num_tokens": 39090486.0, + "reward": 0.47265625, + "reward_std": 0.30645641684532166, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 79, + "tools/generated_tokens": 5935.20703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1454.515625, + "completions/mean_terminated_length": 1110.1605224609375, + "completions/min_length": 9.0, + "completions/min_terminated_length": 9.0, + "entropy": 0.33010238222777843, + "epoch": 0.013632393976185911, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15772663056850433, + "learning_rate": 1e-06, + "loss": 0.0053, + "num_tokens": 39554842.0, + "reward": 0.47265625, + "reward_std": 0.2092868983745575, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 80, + "tools/generated_tokens": 5686.5234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.06640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1429.90234375, + "completions/mean_terminated_length": 1033.6859130859375, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.35302944108843803, + "epoch": 0.013802798900888235, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15112242102622986, + "learning_rate": 1e-06, + "loss": 0.0263, + "num_tokens": 40011553.0, + "reward": 0.41015625, + "reward_std": 0.28301119804382324, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 81, + "tools/generated_tokens": 5821.90234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.14453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1536.80859375, + "completions/mean_terminated_length": 1187.0460205078125, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.376364478841424, + "epoch": 0.01397320382559056, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17073360085487366, + "learning_rate": 1e-06, + "loss": 0.012, + "num_tokens": 40497424.0, + "reward": 0.32421875, + "reward_std": 0.27595800161361694, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 82, + "tools/generated_tokens": 6096.81640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.2265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1381.19140625, + "completions/mean_terminated_length": 987.73291015625, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.3292817212641239, + "epoch": 0.014143608750292884, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.5036759972572327, + "learning_rate": 1e-06, + "loss": 0.0452, + "num_tokens": 40934545.0, + "reward": 0.33203125, + "reward_std": 0.2770320177078247, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 83, + "tools/generated_tokens": 5365.19140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1399.3359375, + "completions/mean_terminated_length": 1174.015869140625, + "completions/min_length": 22.0, + "completions/min_terminated_length": 22.0, + "entropy": 0.33120965771377087, + "epoch": 0.014314013674995208, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16501937806606293, + "learning_rate": 1e-06, + "loss": 0.0097, + "num_tokens": 41375990.0, + "reward": 0.5, + "reward_std": 0.21643753349781036, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 84, + "tools/generated_tokens": 5175.3515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.84375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1628.33203125, + "completions/mean_terminated_length": 1071.318115234375, + "completions/min_length": 81.0, + "completions/min_terminated_length": 81.0, + "entropy": 0.3914438560605049, + "epoch": 0.014484418599697532, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11321353167295456, + "learning_rate": 1e-06, + "loss": 0.0211, + "num_tokens": 41885115.0, + "reward": 0.2734375, + "reward_std": 0.1892854869365692, + "rewards/simpleverify_reward/mean": 0.2734375, + "rewards/simpleverify_reward/std": 0.446596622467041, + "step": 85, + "tools/generated_tokens": 6524.33984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1390.75, + "completions/mean_terminated_length": 1046.482177734375, + "completions/min_length": 13.0, + "completions/min_terminated_length": 13.0, + "entropy": 0.35446988977491856, + "epoch": 0.014654823524399856, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1685134768486023, + "learning_rate": 1e-06, + "loss": 0.0177, + "num_tokens": 42319931.0, + "reward": 0.48046875, + "reward_std": 0.17473775148391724, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 86, + "tools/generated_tokens": 5270.76171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.89453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.53125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1650.60546875, + "completions/mean_terminated_length": 1200.2333984375, + "completions/min_length": 13.0, + "completions/min_terminated_length": 13.0, + "entropy": 0.31209629215300083, + "epoch": 0.014825228449102178, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16722668707370758, + "learning_rate": 1e-06, + "loss": 0.025, + "num_tokens": 42831142.0, + "reward": 0.33984375, + "reward_std": 0.2480090707540512, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 87, + "tools/generated_tokens": 6098.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1558.86328125, + "completions/mean_terminated_length": 1172.3636474609375, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.32257607765495777, + "epoch": 0.014995633373804503, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.262336403131485, + "learning_rate": 1e-06, + "loss": 0.024, + "num_tokens": 43320419.0, + "reward": 0.27734375, + "reward_std": 0.1941438913345337, + "rewards/simpleverify_reward/mean": 0.27734375, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 88, + "tools/generated_tokens": 5806.87890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.07421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1568.30859375, + "completions/mean_terminated_length": 1240.0986328125, + "completions/min_length": 41.0, + "completions/min_terminated_length": 41.0, + "entropy": 0.31894766725599766, + "epoch": 0.015166038298506827, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1430114060640335, + "learning_rate": 1e-06, + "loss": 0.0263, + "num_tokens": 43804674.0, + "reward": 0.33984375, + "reward_std": 0.2531842887401581, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 89, + "tools/generated_tokens": 6024.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.17578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1492.8359375, + "completions/mean_terminated_length": 1106.8145751953125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "entropy": 0.35931413620710373, + "epoch": 0.01533644322320915, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.18770164251327515, + "learning_rate": 1e-06, + "loss": -0.0091, + "num_tokens": 44273656.0, + "reward": 0.42578125, + "reward_std": 0.17781084775924683, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 90, + "tools/generated_tokens": 5804.85546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.10546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1410.2578125, + "completions/mean_terminated_length": 1135.9384765625, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.3204840440303087, + "epoch": 0.015506848147911475, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16799981892108917, + "learning_rate": 1e-06, + "loss": 0.0144, + "num_tokens": 44713354.0, + "reward": 0.41796875, + "reward_std": 0.25784093141555786, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 91, + "tools/generated_tokens": 5010.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1517.09765625, + "completions/mean_terminated_length": 1135.852294921875, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.3315076846629381, + "epoch": 0.0156772530726138, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13466773927211761, + "learning_rate": 1e-06, + "loss": -0.0234, + "num_tokens": 45191827.0, + "reward": 0.27734375, + "reward_std": 0.2296258509159088, + "rewards/simpleverify_reward/mean": 0.27734375, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 92, + "tools/generated_tokens": 6101.10546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.23828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1620.7421875, + "completions/mean_terminated_length": 1151.4835205078125, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "entropy": 0.32753794454038143, + "epoch": 0.015847657997316123, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11930279433727264, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 45696193.0, + "reward": 0.3515625, + "reward_std": 0.19025646150112152, + "rewards/simpleverify_reward/mean": 0.3515625, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 93, + "tools/generated_tokens": 6428.75390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.34765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1418.53125, + "completions/mean_terminated_length": 1040.8499755859375, + "completions/min_length": 33.0, + "completions/min_terminated_length": 33.0, + "entropy": 0.336428202688694, + "epoch": 0.016018062922018447, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16664044559001923, + "learning_rate": 1e-06, + "loss": 0.0255, + "num_tokens": 46151977.0, + "reward": 0.4296875, + "reward_std": 0.27219003438949585, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 94, + "tools/generated_tokens": 5842.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1369.5859375, + "completions/mean_terminated_length": 1161.9132080078125, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.29961889889091253, + "epoch": 0.01618846784672077, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1664113849401474, + "learning_rate": 1e-06, + "loss": 0.0188, + "num_tokens": 46583567.0, + "reward": 0.45703125, + "reward_std": 0.30801716446876526, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 95, + "tools/generated_tokens": 5073.6015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1489.640625, + "completions/mean_terminated_length": 1048.4405517578125, + "completions/min_length": 43.0, + "completions/min_terminated_length": 43.0, + "entropy": 0.31038382835686207, + "epoch": 0.016358872771423096, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17741236090660095, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 47056003.0, + "reward": 0.25390625, + "reward_std": 0.2806849479675293, + "rewards/simpleverify_reward/mean": 0.25390625, + "rewards/simpleverify_reward/std": 0.4360972046852112, + "step": 96, + "tools/generated_tokens": 6121.66015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.26171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1624.23046875, + "completions/mean_terminated_length": 1024.556640625, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.33994131349027157, + "epoch": 0.01652927769612542, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.16670498251914978, + "learning_rate": 1e-06, + "loss": 0.0225, + "num_tokens": 47556878.0, + "reward": 0.3359375, + "reward_std": 0.11022830009460449, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 97, + "tools/generated_tokens": 6456.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1402.65625, + "completions/mean_terminated_length": 1076.188232421875, + "completions/min_length": 14.0, + "completions/min_terminated_length": 14.0, + "entropy": 0.3517347723245621, + "epoch": 0.01669968262082774, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14122840762138367, + "learning_rate": 1e-06, + "loss": 0.046, + "num_tokens": 47995766.0, + "reward": 0.4609375, + "reward_std": 0.16515429317951202, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 98, + "tools/generated_tokens": 5322.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1427.7109375, + "completions/mean_terminated_length": 1010.1372680664062, + "completions/min_length": 69.0, + "completions/min_terminated_length": 69.0, + "entropy": 0.3101299777626991, + "epoch": 0.016870087545530064, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1458555907011032, + "learning_rate": 1e-06, + "loss": -0.0055, + "num_tokens": 48452812.0, + "reward": 0.46484375, + "reward_std": 0.13466504216194153, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 99, + "tools/generated_tokens": 5587.72265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.03125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1206.11328125, + "completions/mean_terminated_length": 1006.8260498046875, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "entropy": 0.3743795230984688, + "epoch": 0.01704049247023239, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16655708849430084, + "learning_rate": 1e-06, + "loss": 0.0213, + "num_tokens": 48853001.0, + "reward": 0.43359375, + "reward_std": 0.24856583774089813, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 100, + "tools/generated_tokens": 5054.11328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1231.859375, + "completions/mean_terminated_length": 1048.3253173828125, + "completions/min_length": 12.0, + "completions/min_terminated_length": 12.0, + "entropy": 0.3789853770285845, + "epoch": 0.017210897394934713, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18441075086593628, + "learning_rate": 1e-06, + "loss": 0.0005, + "num_tokens": 49252037.0, + "reward": 0.390625, + "reward_std": 0.2340293675661087, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 101, + "tools/generated_tokens": 4663.87109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.67578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1294.41015625, + "completions/mean_terminated_length": 1010.8064575195312, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.3455806504935026, + "epoch": 0.017381302319637037, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.22286009788513184, + "learning_rate": 1e-06, + "loss": -0.0133, + "num_tokens": 49678414.0, + "reward": 0.4296875, + "reward_std": 0.3573821485042572, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 102, + "tools/generated_tokens": 5310.421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.33984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1427.609375, + "completions/mean_terminated_length": 1108.2366943359375, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.3169189915060997, + "epoch": 0.01755170724433936, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16372622549533844, + "learning_rate": 1e-06, + "loss": 0.0053, + "num_tokens": 50135642.0, + "reward": 0.484375, + "reward_std": 0.2323840707540512, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 103, + "tools/generated_tokens": 5515.625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.99609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1227.12890625, + "completions/mean_terminated_length": 1092.8045654296875, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.2742748726159334, + "epoch": 0.017722112169041685, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17461593449115753, + "learning_rate": 1e-06, + "loss": 0.0065, + "num_tokens": 50531243.0, + "reward": 0.60546875, + "reward_std": 0.2772725820541382, + "rewards/simpleverify_reward/mean": 0.60546875, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 104, + "tools/generated_tokens": 4291.1328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1237.78515625, + "completions/mean_terminated_length": 1021.1930541992188, + "completions/min_length": 23.0, + "completions/min_terminated_length": 23.0, + "entropy": 0.3275550380349159, + "epoch": 0.01789251709374401, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1666494458913803, + "learning_rate": 1e-06, + "loss": -0.0141, + "num_tokens": 50934276.0, + "reward": 0.3828125, + "reward_std": 0.2369977980852127, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 105, + "tools/generated_tokens": 4749.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1306.1484375, + "completions/mean_terminated_length": 1004.5164794921875, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.3629078324884176, + "epoch": 0.018062922018446333, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.14647015929222107, + "learning_rate": 1e-06, + "loss": 0.0131, + "num_tokens": 51352874.0, + "reward": 0.51171875, + "reward_std": 0.2669561505317688, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 106, + "tools/generated_tokens": 5010.16015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.49609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1560.33984375, + "completions/mean_terminated_length": 1080.2713623046875, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.34390639141201973, + "epoch": 0.018233326943148657, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14581336081027985, + "learning_rate": 1e-06, + "loss": 0.0334, + "num_tokens": 51848881.0, + "reward": 0.3203125, + "reward_std": 0.22075963020324707, + "rewards/simpleverify_reward/mean": 0.3203125, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 107, + "tools/generated_tokens": 6000.35546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.16796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1402.94921875, + "completions/mean_terminated_length": 1028.6605224609375, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.35339405201375484, + "epoch": 0.01840373186785098, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14013394713401794, + "learning_rate": 1e-06, + "loss": 0.0083, + "num_tokens": 52293972.0, + "reward": 0.46484375, + "reward_std": 0.17339344322681427, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 108, + "tools/generated_tokens": 5098.95703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1309.03125, + "completions/mean_terminated_length": 1002.8287963867188, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.31919316854327917, + "epoch": 0.018574136792553306, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15078844130039215, + "learning_rate": 1e-06, + "loss": 0.0357, + "num_tokens": 52722892.0, + "reward": 0.3984375, + "reward_std": 0.2612866461277008, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 109, + "tools/generated_tokens": 5701.0390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.14453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.33984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1321.078125, + "completions/mean_terminated_length": 946.8638916015625, + "completions/min_length": 64.0, + "completions/min_terminated_length": 64.0, + "entropy": 0.32734917663037777, + "epoch": 0.01874454171725563, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18454772233963013, + "learning_rate": 1e-06, + "loss": 0.0383, + "num_tokens": 53147040.0, + "reward": 0.30078125, + "reward_std": 0.23520077764987946, + "rewards/simpleverify_reward/mean": 0.30078125, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 110, + "tools/generated_tokens": 5673.10546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1435.4375, + "completions/mean_terminated_length": 935.8368530273438, + "completions/min_length": 21.0, + "completions/min_terminated_length": 21.0, + "entropy": 0.4320798348635435, + "epoch": 0.018914946641957954, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1485069841146469, + "learning_rate": 1e-06, + "loss": 0.0603, + "num_tokens": 53611456.0, + "reward": 0.32421875, + "reward_std": 0.1660325825214386, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 111, + "tools/generated_tokens": 5859.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2019.0, + "completions/mean_length": 1516.96875, + "completions/mean_terminated_length": 1116.876708984375, + "completions/min_length": 33.0, + "completions/min_terminated_length": 33.0, + "entropy": 0.323065472766757, + "epoch": 0.019085351566660278, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14810976386070251, + "learning_rate": 1e-06, + "loss": 0.023, + "num_tokens": 54085768.0, + "reward": 0.3046875, + "reward_std": 0.19531384110450745, + "rewards/simpleverify_reward/mean": 0.3046875, + "rewards/simpleverify_reward/std": 0.4611765742301941, + "step": 112, + "tools/generated_tokens": 5524.96875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.43359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1487.953125, + "completions/mean_terminated_length": 1059.248291015625, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.35362469032406807, + "epoch": 0.0192557564913626, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14924024045467377, + "learning_rate": 1e-06, + "loss": 0.0351, + "num_tokens": 54561228.0, + "reward": 0.30859375, + "reward_std": 0.26145052909851074, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 113, + "tools/generated_tokens": 6151.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.27734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1527.27734375, + "completions/mean_terminated_length": 1171.006591796875, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.3045827057212591, + "epoch": 0.019426161416064923, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15383483469486237, + "learning_rate": 1e-06, + "loss": 0.0278, + "num_tokens": 55037155.0, + "reward": 0.375, + "reward_std": 0.2668628990650177, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 114, + "tools/generated_tokens": 5895.28515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.1328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1442.7734375, + "completions/mean_terminated_length": 1091.5926513671875, + "completions/min_length": 20.0, + "completions/min_terminated_length": 20.0, + "entropy": 0.3237005192786455, + "epoch": 0.019596566340767247, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.14513492584228516, + "learning_rate": 1e-06, + "loss": 0.0432, + "num_tokens": 55487081.0, + "reward": 0.4296875, + "reward_std": 0.28388863801956177, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 115, + "tools/generated_tokens": 5282.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1450.76171875, + "completions/mean_terminated_length": 1055.1883544921875, + "completions/min_length": 28.0, + "completions/min_terminated_length": 28.0, + "entropy": 0.3501081932336092, + "epoch": 0.01976697126546957, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17928148806095123, + "learning_rate": 1e-06, + "loss": 0.0324, + "num_tokens": 55944428.0, + "reward": 0.40625, + "reward_std": 0.27346593141555786, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 116, + "tools/generated_tokens": 5874.76953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1071.96484375, + "completions/mean_terminated_length": 1015.4999389648438, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "entropy": 0.29012203868478537, + "epoch": 0.019937376190171895, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.24156872928142548, + "learning_rate": 1e-06, + "loss": -0.0235, + "num_tokens": 56293939.0, + "reward": 0.5390625, + "reward_std": 0.20067915320396423, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 117, + "tools/generated_tokens": 3511.9765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1999.0, + "completions/mean_length": 1561.28515625, + "completions/mean_terminated_length": 1158.0072021484375, + "completions/min_length": 12.0, + "completions/min_terminated_length": 12.0, + "entropy": 0.33826029673218727, + "epoch": 0.02010778111487422, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1409139335155487, + "learning_rate": 1e-06, + "loss": 0.0232, + "num_tokens": 56775820.0, + "reward": 0.30078125, + "reward_std": 0.23041339218616486, + "rewards/simpleverify_reward/mean": 0.30078125, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 118, + "tools/generated_tokens": 6257.2890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1306.16015625, + "completions/mean_terminated_length": 1130.5555419921875, + "completions/min_length": 15.0, + "completions/min_terminated_length": 15.0, + "entropy": 0.312307920306921, + "epoch": 0.020278186039576544, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14839047193527222, + "learning_rate": 1e-06, + "loss": -0.0031, + "num_tokens": 57196725.0, + "reward": 0.51953125, + "reward_std": 0.2231852114200592, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 119, + "tools/generated_tokens": 4626.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1988.0, + "completions/mean_length": 1559.515625, + "completions/mean_terminated_length": 1114.7835693359375, + "completions/min_length": 22.0, + "completions/min_terminated_length": 22.0, + "entropy": 0.3601351138204336, + "epoch": 0.020448590964278868, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16119074821472168, + "learning_rate": 1e-06, + "loss": 0.0125, + "num_tokens": 57681305.0, + "reward": 0.3828125, + "reward_std": 0.24304214119911194, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 120, + "tools/generated_tokens": 6255.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1400.7734375, + "completions/mean_terminated_length": 1111.8983154296875, + "completions/min_length": 8.0, + "completions/min_terminated_length": 8.0, + "entropy": 0.36240669898688793, + "epoch": 0.020618995888981192, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12988416850566864, + "learning_rate": 1e-06, + "loss": 0.0009, + "num_tokens": 58129647.0, + "reward": 0.2421875, + "reward_std": 0.15551914274692535, + "rewards/simpleverify_reward/mean": 0.2421875, + "rewards/simpleverify_reward/std": 0.4292463958263397, + "step": 121, + "tools/generated_tokens": 5552.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.02734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1336.1171875, + "completions/mean_terminated_length": 1018.3841552734375, + "completions/min_length": 21.0, + "completions/min_terminated_length": 21.0, + "entropy": 0.3691992927342653, + "epoch": 0.020789400813683516, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17354363203048706, + "learning_rate": 1e-06, + "loss": 0.0241, + "num_tokens": 58562637.0, + "reward": 0.453125, + "reward_std": 0.2553790807723999, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 122, + "tools/generated_tokens": 5216.140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.89453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1317.43359375, + "completions/mean_terminated_length": 997.2977905273438, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.34784369356930256, + "epoch": 0.02095980573838584, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.19032245874404907, + "learning_rate": 1e-06, + "loss": -0.0, + "num_tokens": 58985628.0, + "reward": 0.296875, + "reward_std": 0.29221853613853455, + "rewards/simpleverify_reward/mean": 0.296875, + "rewards/simpleverify_reward/std": 0.45777595043182373, + "step": 123, + "tools/generated_tokens": 5245.44921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.31640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1398.69921875, + "completions/mean_terminated_length": 1098.1656494140625, + "completions/min_length": 14.0, + "completions/min_terminated_length": 14.0, + "entropy": 0.3271794207394123, + "epoch": 0.021130210663088164, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1481109857559204, + "learning_rate": 1e-06, + "loss": 0.0452, + "num_tokens": 59430687.0, + "reward": 0.4375, + "reward_std": 0.26345524191856384, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 124, + "tools/generated_tokens": 5726.70703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.11328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1288.40234375, + "completions/mean_terminated_length": 943.14208984375, + "completions/min_length": 41.0, + "completions/min_terminated_length": 41.0, + "entropy": 0.3033269513398409, + "epoch": 0.02130061558779049, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14241783320903778, + "learning_rate": 1e-06, + "loss": 0.0136, + "num_tokens": 59849270.0, + "reward": 0.43359375, + "reward_std": 0.21863040328025818, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 125, + "tools/generated_tokens": 5112.4140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1443.78125, + "completions/mean_terminated_length": 1104.8353271484375, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.3240698855370283, + "epoch": 0.021471020512492812, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13430829346179962, + "learning_rate": 1e-06, + "loss": -0.017, + "num_tokens": 60294878.0, + "reward": 0.37109375, + "reward_std": 0.23877215385437012, + "rewards/simpleverify_reward/mean": 0.37109375, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 126, + "tools/generated_tokens": 5019.796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.74609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.31640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1436.2734375, + "completions/mean_terminated_length": 1153.142822265625, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "entropy": 0.3526413217186928, + "epoch": 0.021641425437195137, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13777115941047668, + "learning_rate": 1e-06, + "loss": 0.0343, + "num_tokens": 60749540.0, + "reward": 0.31640625, + "reward_std": 0.24117998778820038, + "rewards/simpleverify_reward/mean": 0.31640625, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 127, + "tools/generated_tokens": 5444.28515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.33203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1405.9296875, + "completions/mean_terminated_length": 1086.77783203125, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.32197364047169685, + "epoch": 0.021811830361897457, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.16866663098335266, + "learning_rate": 1e-06, + "loss": 0.0199, + "num_tokens": 61200162.0, + "reward": 0.44140625, + "reward_std": 0.32650285959243774, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 128, + "tools/generated_tokens": 5333.94140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1592.5, + "completions/mean_terminated_length": 1099.9755859375, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.33435916900634766, + "epoch": 0.02198223528659978, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.12271421402692795, + "learning_rate": 1e-06, + "loss": 0.064, + "num_tokens": 61697746.0, + "reward": 0.234375, + "reward_std": 0.19970625638961792, + "rewards/simpleverify_reward/mean": 0.234375, + "rewards/simpleverify_reward/std": 0.42443734407424927, + "step": 129, + "tools/generated_tokens": 6416.50390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1537.34375, + "completions/mean_terminated_length": 1269.857177734375, + "completions/min_length": 45.0, + "completions/min_terminated_length": 45.0, + "entropy": 0.30458197370171547, + "epoch": 0.022152640211302106, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1821925938129425, + "learning_rate": 1e-06, + "loss": 0.0457, + "num_tokens": 62171482.0, + "reward": 0.484375, + "reward_std": 0.26566585898399353, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 130, + "tools/generated_tokens": 5737.3515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.05078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1353.23828125, + "completions/mean_terminated_length": 1116.801025390625, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.28691938519477844, + "epoch": 0.02232304513600443, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13503485918045044, + "learning_rate": 1e-06, + "loss": -0.0048, + "num_tokens": 62599591.0, + "reward": 0.49609375, + "reward_std": 0.21896778047084808, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 131, + "tools/generated_tokens": 4673.25, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1494.06640625, + "completions/mean_terminated_length": 1246.83056640625, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "entropy": 0.2928379736840725, + "epoch": 0.022493450060706754, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11686106026172638, + "learning_rate": 1e-06, + "loss": -0.0004, + "num_tokens": 63053000.0, + "reward": 0.37890625, + "reward_std": 0.1468954086303711, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 132, + "tools/generated_tokens": 4662.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1466.23046875, + "completions/mean_terminated_length": 1122.9503173828125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "entropy": 0.32553007639944553, + "epoch": 0.022663854985409078, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13065077364444733, + "learning_rate": 1e-06, + "loss": 0.0307, + "num_tokens": 63515715.0, + "reward": 0.26953125, + "reward_std": 0.22028234601020813, + "rewards/simpleverify_reward/mean": 0.26953125, + "rewards/simpleverify_reward/std": 0.44458550214767456, + "step": 133, + "tools/generated_tokens": 5786.23828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1266.171875, + "completions/mean_terminated_length": 1099.4312744140625, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.3138121534138918, + "epoch": 0.022834259910111402, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.24351127445697784, + "learning_rate": 1e-06, + "loss": -0.0215, + "num_tokens": 63922639.0, + "reward": 0.52734375, + "reward_std": 0.20970112085342407, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 134, + "tools/generated_tokens": 4794.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.39453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1531.703125, + "completions/mean_terminated_length": 1195.322509765625, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.32346850633621216, + "epoch": 0.023004664834813726, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11104744672775269, + "learning_rate": 1e-06, + "loss": 0.037, + "num_tokens": 64404467.0, + "reward": 0.26171875, + "reward_std": 0.20310088992118835, + "rewards/simpleverify_reward/mean": 0.26171875, + "rewards/simpleverify_reward/std": 0.4404313564300537, + "step": 135, + "tools/generated_tokens": 5963.73046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.1640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1487.28515625, + "completions/mean_terminated_length": 1097.3973388671875, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.37246280163526535, + "epoch": 0.02317506975951605, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12094295769929886, + "learning_rate": 1e-06, + "loss": 0.0145, + "num_tokens": 64871292.0, + "reward": 0.390625, + "reward_std": 0.16691282391548157, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 136, + "tools/generated_tokens": 5767.296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.08984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1332.03125, + "completions/mean_terminated_length": 1088.3822021484375, + "completions/min_length": 28.0, + "completions/min_terminated_length": 28.0, + "entropy": 0.3254028670489788, + "epoch": 0.023345474684218374, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1289188116788864, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 65292916.0, + "reward": 0.3984375, + "reward_std": 0.17835843563079834, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 137, + "tools/generated_tokens": 4796.04296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1414.75390625, + "completions/mean_terminated_length": 1077.2755126953125, + "completions/min_length": 26.0, + "completions/min_terminated_length": 26.0, + "entropy": 0.2777953064069152, + "epoch": 0.0235158796089207, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12421082705259323, + "learning_rate": 1e-06, + "loss": 0.0173, + "num_tokens": 65737413.0, + "reward": 0.46484375, + "reward_std": 0.2040461003780365, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 138, + "tools/generated_tokens": 5150.765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.82421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1241.41796875, + "completions/mean_terminated_length": 972.5573120117188, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.314918152987957, + "epoch": 0.023686284533623023, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.20149211585521698, + "learning_rate": 1e-06, + "loss": 0.0769, + "num_tokens": 66141392.0, + "reward": 0.54296875, + "reward_std": 0.3109705150127411, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 139, + "tools/generated_tokens": 4761.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1349.16015625, + "completions/mean_terminated_length": 1048.5418701171875, + "completions/min_length": 30.0, + "completions/min_terminated_length": 30.0, + "entropy": 0.3121089041233063, + "epoch": 0.023856689458325347, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1447082757949829, + "learning_rate": 1e-06, + "loss": 0.0228, + "num_tokens": 66567529.0, + "reward": 0.38671875, + "reward_std": 0.2085040807723999, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 140, + "tools/generated_tokens": 5141.171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1472.1796875, + "completions/mean_terminated_length": 1058.671142578125, + "completions/min_length": 22.0, + "completions/min_terminated_length": 22.0, + "entropy": 0.3121817819774151, + "epoch": 0.02402709438302767, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17078803479671478, + "learning_rate": 1e-06, + "loss": 0.0383, + "num_tokens": 67032487.0, + "reward": 0.3984375, + "reward_std": 0.24063238501548767, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 141, + "tools/generated_tokens": 5712.18359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1251.36328125, + "completions/mean_terminated_length": 1076.8619384765625, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.3102445937693119, + "epoch": 0.024197499307729995, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.21668432652950287, + "learning_rate": 1e-06, + "loss": 0.0228, + "num_tokens": 67439636.0, + "reward": 0.57421875, + "reward_std": 0.34586799144744873, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 142, + "tools/generated_tokens": 4779.37109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1396.328125, + "completions/mean_terminated_length": 1049.030029296875, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "entropy": 0.3218431733548641, + "epoch": 0.024367904232432316, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14836956560611725, + "learning_rate": 1e-06, + "loss": 0.0293, + "num_tokens": 67886424.0, + "reward": 0.33984375, + "reward_std": 0.2125907838344574, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 143, + "tools/generated_tokens": 5436.33203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.97265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1389.7734375, + "completions/mean_terminated_length": 1122.1484375, + "completions/min_length": 7.0, + "completions/min_terminated_length": 7.0, + "entropy": 0.3523574620485306, + "epoch": 0.02453830915713464, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20129433274269104, + "learning_rate": 1e-06, + "loss": 0.0397, + "num_tokens": 68333518.0, + "reward": 0.42578125, + "reward_std": 0.3636796474456787, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 144, + "tools/generated_tokens": 5765.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.13671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1398.36328125, + "completions/mean_terminated_length": 1153.8763427734375, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "entropy": 0.2895941939204931, + "epoch": 0.024708714081836964, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18979892134666443, + "learning_rate": 1e-06, + "loss": -0.0006, + "num_tokens": 68777403.0, + "reward": 0.4921875, + "reward_std": 0.30592674016952515, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 145, + "tools/generated_tokens": 5462.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1372.703125, + "completions/mean_terminated_length": 993.8840942382812, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.3500816449522972, + "epoch": 0.024879119006539288, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1640709489583969, + "learning_rate": 1e-06, + "loss": -0.0275, + "num_tokens": 69222111.0, + "reward": 0.23828125, + "reward_std": 0.23751798272132874, + "rewards/simpleverify_reward/mean": 0.23828125, + "rewards/simpleverify_reward/std": 0.4268665909767151, + "step": 146, + "tools/generated_tokens": 5380.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1312.94140625, + "completions/mean_terminated_length": 1078.0257568359375, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.3114693034440279, + "epoch": 0.025049523931241612, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2001684457063675, + "learning_rate": 1e-06, + "loss": -0.0069, + "num_tokens": 69645680.0, + "reward": 0.5, + "reward_std": 0.2634032666683197, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 147, + "tools/generated_tokens": 5088.94921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.84375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1553.296875, + "completions/mean_terminated_length": 1266.25927734375, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.33421515114605427, + "epoch": 0.025219928855943936, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1346120834350586, + "learning_rate": 1e-06, + "loss": 0.0333, + "num_tokens": 70129516.0, + "reward": 0.33203125, + "reward_std": 0.25648343563079834, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 148, + "tools/generated_tokens": 5561.3125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2015.0, + "completions/mean_length": 1350.3125, + "completions/mean_terminated_length": 1021.5230102539062, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "entropy": 0.2868925202637911, + "epoch": 0.02539033378064626, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.14119477570056915, + "learning_rate": 1e-06, + "loss": 0.037, + "num_tokens": 70550860.0, + "reward": 0.41015625, + "reward_std": 0.1461106687784195, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 149, + "tools/generated_tokens": 4838.33203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.40625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1988.0, + "completions/mean_length": 1469.46875, + "completions/mean_terminated_length": 1073.63818359375, + "completions/min_length": 26.0, + "completions/min_terminated_length": 26.0, + "entropy": 0.3634101618081331, + "epoch": 0.025560738705348585, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1538143754005432, + "learning_rate": 1e-06, + "loss": 0.0085, + "num_tokens": 71014340.0, + "reward": 0.35546875, + "reward_std": 0.2559266984462738, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 150, + "tools/generated_tokens": 5749.46875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.08984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1345.05078125, + "completions/mean_terminated_length": 1064.6392822265625, + "completions/min_length": 104.0, + "completions/min_terminated_length": 104.0, + "entropy": 0.3609559182077646, + "epoch": 0.02573114363005091, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17284370958805084, + "learning_rate": 1e-06, + "loss": 0.0424, + "num_tokens": 71442577.0, + "reward": 0.44140625, + "reward_std": 0.22106516361236572, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 151, + "tools/generated_tokens": 5217.0546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1619.13671875, + "completions/mean_terminated_length": 1228.6865234375, + "completions/min_length": 8.0, + "completions/min_terminated_length": 8.0, + "entropy": 0.3010506443679333, + "epoch": 0.025901548554753233, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.11869866400957108, + "learning_rate": 1e-06, + "loss": 0.0247, + "num_tokens": 71944500.0, + "reward": 0.33203125, + "reward_std": 0.24579845368862152, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 152, + "tools/generated_tokens": 6203.140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.23828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1451.17578125, + "completions/mean_terminated_length": 1138.5595703125, + "completions/min_length": 100.0, + "completions/min_terminated_length": 100.0, + "entropy": 0.3158528581261635, + "epoch": 0.026071953479455557, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.09035536646842957, + "learning_rate": 1e-06, + "loss": 0.0407, + "num_tokens": 72399953.0, + "reward": 0.38671875, + "reward_std": 0.10409127175807953, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 153, + "tools/generated_tokens": 5299.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1399.078125, + "completions/mean_terminated_length": 1159.6417236328125, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.37316756322979927, + "epoch": 0.02624235840415788, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16092449426651, + "learning_rate": 1e-06, + "loss": 0.0123, + "num_tokens": 72848549.0, + "reward": 0.40234375, + "reward_std": 0.2811351418495178, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 154, + "tools/generated_tokens": 5431.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.96875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.51953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1629.77734375, + "completions/mean_terminated_length": 1177.5528564453125, + "completions/min_length": 8.0, + "completions/min_terminated_length": 8.0, + "entropy": 0.3274534326046705, + "epoch": 0.026412763328860205, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.09790311753749847, + "learning_rate": 1e-06, + "loss": 0.0033, + "num_tokens": 73351500.0, + "reward": 0.23828125, + "reward_std": 0.18463993072509766, + "rewards/simpleverify_reward/mean": 0.23828125, + "rewards/simpleverify_reward/std": 0.4268665909767151, + "step": 155, + "tools/generated_tokens": 6509.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.3828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1213.96875, + "completions/mean_terminated_length": 1059.5185546875, + "completions/min_length": 40.0, + "completions/min_terminated_length": 40.0, + "entropy": 0.28100949712097645, + "epoch": 0.02658316825356253, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.15801389515399933, + "learning_rate": 1e-06, + "loss": 0.0149, + "num_tokens": 73750612.0, + "reward": 0.453125, + "reward_std": 0.2885051667690277, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 156, + "tools/generated_tokens": 4717.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1998.0, + "completions/mean_length": 1363.63671875, + "completions/mean_terminated_length": 1154.142822265625, + "completions/min_length": 22.0, + "completions/min_terminated_length": 22.0, + "entropy": 0.30511037074029446, + "epoch": 0.026753573178264854, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14587201178073883, + "learning_rate": 1e-06, + "loss": -0.0028, + "num_tokens": 74186647.0, + "reward": 0.44140625, + "reward_std": 0.18435022234916687, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 157, + "tools/generated_tokens": 5235.65234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1257.265625, + "completions/mean_terminated_length": 1102.079345703125, + "completions/min_length": 38.0, + "completions/min_terminated_length": 38.0, + "entropy": 0.2574101975187659, + "epoch": 0.026923978102967174, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16285882890224457, + "learning_rate": 1e-06, + "loss": -0.011, + "num_tokens": 74583339.0, + "reward": 0.65625, + "reward_std": 0.18364217877388, + "rewards/simpleverify_reward/mean": 0.65625, + "rewards/simpleverify_reward/std": 0.47588926553726196, + "step": 158, + "tools/generated_tokens": 3905.28515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1388.25390625, + "completions/mean_terminated_length": 1114.884033203125, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.26684923097491264, + "epoch": 0.0270943830276695, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1531786024570465, + "learning_rate": 1e-06, + "loss": 0.0066, + "num_tokens": 75012652.0, + "reward": 0.40625, + "reward_std": 0.2531684637069702, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 159, + "tools/generated_tokens": 4580.2734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1539.57421875, + "completions/mean_terminated_length": 1131.4013671875, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.3059833236038685, + "epoch": 0.027264787952371822, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12701913714408875, + "learning_rate": 1e-06, + "loss": 0.0039, + "num_tokens": 75494751.0, + "reward": 0.35546875, + "reward_std": 0.16625863313674927, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 160, + "tools/generated_tokens": 5995.578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.17578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1390.21484375, + "completions/mean_terminated_length": 1180.0, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.3158688638359308, + "epoch": 0.027435192877074147, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.16224777698516846, + "learning_rate": 1e-06, + "loss": 0.0048, + "num_tokens": 75931430.0, + "reward": 0.3125, + "reward_std": 0.3364320993423462, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 161, + "tools/generated_tokens": 5494.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.00390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1517.30859375, + "completions/mean_terminated_length": 1018.7954711914062, + "completions/min_length": 7.0, + "completions/min_terminated_length": 7.0, + "entropy": 0.29948427714407444, + "epoch": 0.02760559780177647, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.16643038392066956, + "learning_rate": 1e-06, + "loss": 0.026, + "num_tokens": 76405829.0, + "reward": 0.33203125, + "reward_std": 0.19893452525138855, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 162, + "tools/generated_tokens": 6005.31640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1397.671875, + "completions/mean_terminated_length": 1143.2010498046875, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.3086371049284935, + "epoch": 0.027776002726478795, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14533433318138123, + "learning_rate": 1e-06, + "loss": 0.0239, + "num_tokens": 76848305.0, + "reward": 0.33203125, + "reward_std": 0.2884256839752197, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 163, + "tools/generated_tokens": 5277.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.89453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1483.92578125, + "completions/mean_terminated_length": 1198.5823974609375, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.2766151363030076, + "epoch": 0.02794640765118112, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17099782824516296, + "learning_rate": 1e-06, + "loss": 0.017, + "num_tokens": 77313134.0, + "reward": 0.41796875, + "reward_std": 0.2649644613265991, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 164, + "tools/generated_tokens": 5291.9453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.44140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1485.609375, + "completions/mean_terminated_length": 1041.2098388671875, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.3144151847809553, + "epoch": 0.028116812575883443, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1765998750925064, + "learning_rate": 1e-06, + "loss": 0.0347, + "num_tokens": 77787674.0, + "reward": 0.27734375, + "reward_std": 0.24800434708595276, + "rewards/simpleverify_reward/mean": 0.27734375, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 165, + "tools/generated_tokens": 6005.6171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1479.140625, + "completions/mean_terminated_length": 1211.063232421875, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.31386564671993256, + "epoch": 0.028287217500585767, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.15623007714748383, + "learning_rate": 1e-06, + "loss": 0.0348, + "num_tokens": 78249838.0, + "reward": 0.42578125, + "reward_std": 0.2801070213317871, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 166, + "tools/generated_tokens": 5519.1484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.97265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1282.81640625, + "completions/mean_terminated_length": 1022.4136352539062, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.36202933825552464, + "epoch": 0.02845762242528809, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.11055434495210648, + "learning_rate": 1e-06, + "loss": 0.0032, + "num_tokens": 78651695.0, + "reward": 0.328125, + "reward_std": 0.1186390072107315, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 167, + "tools/generated_tokens": 4554.82421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2009.0, + "completions/mean_length": 1410.91015625, + "completions/mean_terminated_length": 1175.834228515625, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.3066523037850857, + "epoch": 0.028628027349990415, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.14977262914180756, + "learning_rate": 1e-06, + "loss": 0.0274, + "num_tokens": 79099000.0, + "reward": 0.51171875, + "reward_std": 0.30647432804107666, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 168, + "tools/generated_tokens": 4818.9140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.37890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1478.41796875, + "completions/mean_terminated_length": 1130.943359375, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.2944907881319523, + "epoch": 0.02879843227469274, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14796318113803864, + "learning_rate": 1e-06, + "loss": 0.0249, + "num_tokens": 79562003.0, + "reward": 0.36328125, + "reward_std": 0.22028234601020813, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 169, + "tools/generated_tokens": 5750.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1346.90234375, + "completions/mean_terminated_length": 1077.83251953125, + "completions/min_length": 79.0, + "completions/min_terminated_length": 79.0, + "entropy": 0.33489724062383175, + "epoch": 0.028968837199395064, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.30806589126586914, + "learning_rate": 1e-06, + "loss": 0.0322, + "num_tokens": 80003418.0, + "reward": 0.3359375, + "reward_std": 0.34549540281295776, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 170, + "tools/generated_tokens": 5234.921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1499.08984375, + "completions/mean_terminated_length": 1211.5714111328125, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.3544781617820263, + "epoch": 0.029139242124097388, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16204816102981567, + "learning_rate": 1e-06, + "loss": 0.0064, + "num_tokens": 80472081.0, + "reward": 0.3984375, + "reward_std": 0.31676173210144043, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 171, + "tools/generated_tokens": 5459.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.93359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.36328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1476.9296875, + "completions/mean_terminated_length": 1151.104248046875, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "entropy": 0.34173163399100304, + "epoch": 0.029309647048799712, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1564481258392334, + "learning_rate": 1e-06, + "loss": 0.0499, + "num_tokens": 80943743.0, + "reward": 0.27734375, + "reward_std": 0.31761646270751953, + "rewards/simpleverify_reward/mean": 0.27734375, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 172, + "tools/generated_tokens": 6148.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1213.7734375, + "completions/mean_terminated_length": 1040.632080078125, + "completions/min_length": 71.0, + "completions/min_terminated_length": 71.0, + "entropy": 0.3498959634453058, + "epoch": 0.029480051973502033, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1797487586736679, + "learning_rate": 1e-06, + "loss": -0.011, + "num_tokens": 81338917.0, + "reward": 0.3984375, + "reward_std": 0.2843528985977173, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 173, + "tools/generated_tokens": 4613.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1352.1953125, + "completions/mean_terminated_length": 1052.8826904296875, + "completions/min_length": 79.0, + "completions/min_terminated_length": 79.0, + "entropy": 0.3132346123456955, + "epoch": 0.029650456898204357, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16205665469169617, + "learning_rate": 1e-06, + "loss": 0.0375, + "num_tokens": 81769623.0, + "reward": 0.40234375, + "reward_std": 0.26075831055641174, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 174, + "tools/generated_tokens": 5056.2109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1256.58984375, + "completions/mean_terminated_length": 1127.0863037109375, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.28759870771318674, + "epoch": 0.02982086182290668, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14383946359157562, + "learning_rate": 1e-06, + "loss": 0.0142, + "num_tokens": 82181790.0, + "reward": 0.44921875, + "reward_std": 0.1560128629207611, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 175, + "tools/generated_tokens": 4712.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2017.0, + "completions/mean_length": 1320.77734375, + "completions/mean_terminated_length": 1126.3812255859375, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.29212189465761185, + "epoch": 0.029991266747609005, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13572239875793457, + "learning_rate": 1e-06, + "loss": -0.0069, + "num_tokens": 82609605.0, + "reward": 0.49609375, + "reward_std": 0.19864007830619812, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 176, + "tools/generated_tokens": 4904.78125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1314.265625, + "completions/mean_terminated_length": 1136.1748046875, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "entropy": 0.3161185160279274, + "epoch": 0.03016167167231133, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.2562132477760315, + "learning_rate": 1e-06, + "loss": -0.0042, + "num_tokens": 83031017.0, + "reward": 0.5078125, + "reward_std": 0.3289920687675476, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 177, + "tools/generated_tokens": 4546.2734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1411.23828125, + "completions/mean_terminated_length": 1121.8125, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.34062889590859413, + "epoch": 0.030332076597013653, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.10277829319238663, + "learning_rate": 1e-06, + "loss": 0.0138, + "num_tokens": 83473494.0, + "reward": 0.33984375, + "reward_std": 0.10596734285354614, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 178, + "tools/generated_tokens": 4875.24609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1350.0703125, + "completions/mean_terminated_length": 1150.1708984375, + "completions/min_length": 46.0, + "completions/min_terminated_length": 46.0, + "entropy": 0.32410680316388607, + "epoch": 0.030502481521715977, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15116848051548004, + "learning_rate": 1e-06, + "loss": 0.0147, + "num_tokens": 83901896.0, + "reward": 0.41796875, + "reward_std": 0.21777918934822083, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 179, + "tools/generated_tokens": 4670.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1518.0859375, + "completions/mean_terminated_length": 1167.1038818359375, + "completions/min_length": 50.0, + "completions/min_terminated_length": 50.0, + "entropy": 0.36199636943638325, + "epoch": 0.0306728864464183, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13650768995285034, + "learning_rate": 1e-06, + "loss": 0.0, + "num_tokens": 84370446.0, + "reward": 0.31640625, + "reward_std": 0.1536140739917755, + "rewards/simpleverify_reward/mean": 0.31640625, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 180, + "tools/generated_tokens": 5438.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1356.03515625, + "completions/mean_terminated_length": 1162.2850341796875, + "completions/min_length": 83.0, + "completions/min_terminated_length": 83.0, + "entropy": 0.341150039806962, + "epoch": 0.030843291371120626, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1136154979467392, + "learning_rate": 1e-06, + "loss": 0.0224, + "num_tokens": 84797719.0, + "reward": 0.5234375, + "reward_std": 0.15325656533241272, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 181, + "tools/generated_tokens": 4316.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1319.46484375, + "completions/mean_terminated_length": 1219.0888671875, + "completions/min_length": 37.0, + "completions/min_terminated_length": 37.0, + "entropy": 0.2949541173875332, + "epoch": 0.03101369629582295, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15052412450313568, + "learning_rate": 1e-06, + "loss": 0.0026, + "num_tokens": 85216174.0, + "reward": 0.6171875, + "reward_std": 0.2403016835451126, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 182, + "tools/generated_tokens": 4583.46484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1295.13671875, + "completions/mean_terminated_length": 1116.9227294921875, + "completions/min_length": 116.0, + "completions/min_terminated_length": 116.0, + "entropy": 0.3051509000360966, + "epoch": 0.031184101220525274, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1615077406167984, + "learning_rate": 1e-06, + "loss": 0.0061, + "num_tokens": 85632577.0, + "reward": 0.51953125, + "reward_std": 0.27994656562805176, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 183, + "tools/generated_tokens": 4967.140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.79296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2010.0, + "completions/mean_length": 1411.5234375, + "completions/mean_terminated_length": 1229.2210693359375, + "completions/min_length": 64.0, + "completions/min_terminated_length": 64.0, + "entropy": 0.32412454672157764, + "epoch": 0.0313545061452276, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1103578731417656, + "learning_rate": 1e-06, + "loss": 0.0123, + "num_tokens": 86067063.0, + "reward": 0.3671875, + "reward_std": 0.16133463382720947, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 184, + "tools/generated_tokens": 4707.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1334.00390625, + "completions/mean_terminated_length": 1075.7552490234375, + "completions/min_length": 7.0, + "completions/min_terminated_length": 7.0, + "entropy": 0.32885063998401165, + "epoch": 0.03152491106992992, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17355377972126007, + "learning_rate": 1e-06, + "loss": -0.0016, + "num_tokens": 86491688.0, + "reward": 0.4453125, + "reward_std": 0.2305552214384079, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 185, + "tools/generated_tokens": 4966.02734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1390.4296875, + "completions/mean_terminated_length": 1162.0106201171875, + "completions/min_length": 41.0, + "completions/min_terminated_length": 41.0, + "entropy": 0.2966838479042053, + "epoch": 0.031695315994632246, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1571619063615799, + "learning_rate": 1e-06, + "loss": -0.011, + "num_tokens": 86927190.0, + "reward": 0.5078125, + "reward_std": 0.23590736091136932, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 186, + "tools/generated_tokens": 4670.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1164.3203125, + "completions/mean_terminated_length": 1093.4766845703125, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.3238255549222231, + "epoch": 0.03186572091933457, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17975440621376038, + "learning_rate": 1e-06, + "loss": -0.0092, + "num_tokens": 87310472.0, + "reward": 0.76171875, + "reward_std": 0.2568175494670868, + "rewards/simpleverify_reward/mean": 0.76171875, + "rewards/simpleverify_reward/std": 0.4268665909767151, + "step": 187, + "tools/generated_tokens": 4012.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1378.62109375, + "completions/mean_terminated_length": 1182.5404052734375, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.3143183123320341, + "epoch": 0.032036125844036895, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1601002961397171, + "learning_rate": 1e-06, + "loss": 0.0318, + "num_tokens": 87745127.0, + "reward": 0.453125, + "reward_std": 0.2835540473461151, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 188, + "tools/generated_tokens": 4778.6328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1188.34765625, + "completions/mean_terminated_length": 1029.15283203125, + "completions/min_length": 89.0, + "completions/min_terminated_length": 89.0, + "entropy": 0.3333643972873688, + "epoch": 0.032206530768739215, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.168931782245636, + "learning_rate": 1e-06, + "loss": 0.016, + "num_tokens": 88131792.0, + "reward": 0.48828125, + "reward_std": 0.302188515663147, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 189, + "tools/generated_tokens": 4556.34765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.64453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.31640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1523.765625, + "completions/mean_terminated_length": 1281.125732421875, + "completions/min_length": 127.0, + "completions/min_terminated_length": 127.0, + "entropy": 0.3226275350898504, + "epoch": 0.03237693569344154, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12362457811832428, + "learning_rate": 1e-06, + "loss": 0.0096, + "num_tokens": 88596084.0, + "reward": 0.3515625, + "reward_std": 0.13896197080612183, + "rewards/simpleverify_reward/mean": 0.3515625, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 190, + "tools/generated_tokens": 5195.7734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.79296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1998.0, + "completions/mean_length": 1396.05078125, + "completions/mean_terminated_length": 1125.91162109375, + "completions/min_length": 72.0, + "completions/min_terminated_length": 72.0, + "entropy": 0.3478570803999901, + "epoch": 0.032547340618143863, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18209584057331085, + "learning_rate": 1e-06, + "loss": 0.044, + "num_tokens": 89031665.0, + "reward": 0.4453125, + "reward_std": 0.2613418400287628, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 191, + "tools/generated_tokens": 5108.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1321.42578125, + "completions/mean_terminated_length": 1122.6119384765625, + "completions/min_length": 153.0, + "completions/min_terminated_length": 153.0, + "entropy": 0.29072050005197525, + "epoch": 0.03271774554284619, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16858156025409698, + "learning_rate": 1e-06, + "loss": 0.0301, + "num_tokens": 89470286.0, + "reward": 0.5234375, + "reward_std": 0.30222654342651367, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 192, + "tools/generated_tokens": 5169.43359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1489.44921875, + "completions/mean_terminated_length": 1176.1219482421875, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "entropy": 0.29124719835817814, + "epoch": 0.03288815046754851, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.27041056752204895, + "learning_rate": 1e-06, + "loss": 0.0084, + "num_tokens": 89932769.0, + "reward": 0.47265625, + "reward_std": 0.3046509623527527, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 193, + "tools/generated_tokens": 5553.44921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1351.3671875, + "completions/mean_terminated_length": 1123.9688720703125, + "completions/min_length": 49.0, + "completions/min_terminated_length": 49.0, + "entropy": 0.3492069635540247, + "epoch": 0.03305855539225084, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18806102871894836, + "learning_rate": 1e-06, + "loss": 0.0223, + "num_tokens": 90372047.0, + "reward": 0.5546875, + "reward_std": 0.2830354869365692, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 194, + "tools/generated_tokens": 5111.3671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1253.56640625, + "completions/mean_terminated_length": 1167.5887451171875, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.2873858269304037, + "epoch": 0.03322896031695316, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.14380556344985962, + "learning_rate": 1e-06, + "loss": 0.0323, + "num_tokens": 90775760.0, + "reward": 0.49609375, + "reward_std": 0.26275384426116943, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 195, + "tools/generated_tokens": 4005.5703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1291.87890625, + "completions/mean_terminated_length": 1139.2347412109375, + "completions/min_length": 83.0, + "completions/min_terminated_length": 83.0, + "entropy": 0.3549950644373894, + "epoch": 0.03339936524165548, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.1846829056739807, + "learning_rate": 1e-06, + "loss": 0.0129, + "num_tokens": 91187697.0, + "reward": 0.40234375, + "reward_std": 0.33350884914398193, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 196, + "tools/generated_tokens": 4331.8828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1270.66015625, + "completions/mean_terminated_length": 1139.3287353515625, + "completions/min_length": 112.0, + "completions/min_terminated_length": 112.0, + "entropy": 0.31027913466095924, + "epoch": 0.03356977016635781, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1424965262413025, + "learning_rate": 1e-06, + "loss": -0.0202, + "num_tokens": 91596890.0, + "reward": 0.5, + "reward_std": 0.1507449597120285, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 197, + "tools/generated_tokens": 4342.66015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.41796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2000.0, + "completions/mean_length": 1501.75, + "completions/mean_terminated_length": 1109.4765625, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.3565778099000454, + "epoch": 0.03374017509106013, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16838455200195312, + "learning_rate": 1e-06, + "loss": 0.0645, + "num_tokens": 92066794.0, + "reward": 0.359375, + "reward_std": 0.2964656949043274, + "rewards/simpleverify_reward/mean": 0.359375, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 198, + "tools/generated_tokens": 5725.75390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1269.703125, + "completions/mean_terminated_length": 1121.28369140625, + "completions/min_length": 75.0, + "completions/min_terminated_length": 75.0, + "entropy": 0.3502108883112669, + "epoch": 0.033910580015762457, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20792637765407562, + "learning_rate": 1e-06, + "loss": 0.0462, + "num_tokens": 92480238.0, + "reward": 0.5625, + "reward_std": 0.3204492926597595, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 199, + "tools/generated_tokens": 5061.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1424.1875, + "completions/mean_terminated_length": 1085.98193359375, + "completions/min_length": 239.0, + "completions/min_terminated_length": 239.0, + "entropy": 0.3356624115258455, + "epoch": 0.03408098494046478, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.24913813173770905, + "learning_rate": 1e-06, + "loss": 0.0232, + "num_tokens": 92933614.0, + "reward": 0.2890625, + "reward_std": 0.2746789753437042, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 200, + "tools/generated_tokens": 5440.19921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1463.45703125, + "completions/mean_terminated_length": 1050.38671875, + "completions/min_length": 53.0, + "completions/min_terminated_length": 53.0, + "entropy": 0.3088175095617771, + "epoch": 0.034251389865167105, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.15846951305866241, + "learning_rate": 1e-06, + "loss": 0.0449, + "num_tokens": 93392115.0, + "reward": 0.2890625, + "reward_std": 0.30979403853416443, + "rewards/simpleverify_reward/mean": 0.2890625, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 201, + "tools/generated_tokens": 5623.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.03125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1363.51953125, + "completions/mean_terminated_length": 1217.540283203125, + "completions/min_length": 61.0, + "completions/min_terminated_length": 61.0, + "entropy": 0.2520632538944483, + "epoch": 0.034421794789869425, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.12616130709648132, + "learning_rate": 1e-06, + "loss": -0.0291, + "num_tokens": 93819400.0, + "reward": 0.61328125, + "reward_std": 0.21367931365966797, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 202, + "tools/generated_tokens": 3867.53125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.22265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1993.0, + "completions/mean_length": 1499.68359375, + "completions/mean_terminated_length": 1159.594970703125, + "completions/min_length": 318.0, + "completions/min_terminated_length": 318.0, + "entropy": 0.34026093780994415, + "epoch": 0.03459219971457175, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1201467216014862, + "learning_rate": 1e-06, + "loss": 0.0179, + "num_tokens": 94287399.0, + "reward": 0.203125, + "reward_std": 0.17978152632713318, + "rewards/simpleverify_reward/mean": 0.203125, + "rewards/simpleverify_reward/std": 0.40311288833618164, + "step": 203, + "tools/generated_tokens": 5587.69921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.99609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1325.0, + "completions/mean_terminated_length": 1042.0870361328125, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.3588677067309618, + "epoch": 0.034762604639274074, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1788933426141739, + "learning_rate": 1e-06, + "loss": 0.0247, + "num_tokens": 94714951.0, + "reward": 0.32421875, + "reward_std": 0.2567910850048065, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 204, + "tools/generated_tokens": 4909.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1116.29296875, + "completions/mean_terminated_length": 1033.0340576171875, + "completions/min_length": 82.0, + "completions/min_terminated_length": 82.0, + "entropy": 0.31627057492733, + "epoch": 0.0349330095639764, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16822679340839386, + "learning_rate": 1e-06, + "loss": 0.0034, + "num_tokens": 95078178.0, + "reward": 0.6015625, + "reward_std": 0.28749823570251465, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 205, + "tools/generated_tokens": 3820.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1457.61328125, + "completions/mean_terminated_length": 1148.3690185546875, + "completions/min_length": 84.0, + "completions/min_terminated_length": 84.0, + "entropy": 0.29588034749031067, + "epoch": 0.03510341448867872, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14018484950065613, + "learning_rate": 1e-06, + "loss": -0.0083, + "num_tokens": 95551183.0, + "reward": 0.3828125, + "reward_std": 0.2541801333427429, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 206, + "tools/generated_tokens": 5465.62890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1238.25, + "completions/mean_terminated_length": 1092.7188720703125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "entropy": 0.3713560700416565, + "epoch": 0.03527381941338105, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15887637436389923, + "learning_rate": 1e-06, + "loss": -0.0072, + "num_tokens": 95948911.0, + "reward": 0.53515625, + "reward_std": 0.24932172894477844, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 207, + "tools/generated_tokens": 4262.25390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1590.796875, + "completions/mean_terminated_length": 1199.8551025390625, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "entropy": 0.38159251026809216, + "epoch": 0.03544422433808337, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16650021076202393, + "learning_rate": 1e-06, + "loss": 0.0254, + "num_tokens": 96445083.0, + "reward": 0.21875, + "reward_std": 0.26362934708595276, + "rewards/simpleverify_reward/mean": 0.21875, + "rewards/simpleverify_reward/std": 0.41420844197273254, + "step": 208, + "tools/generated_tokens": 6430.8046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1264.2421875, + "completions/mean_terminated_length": 1064.4608154296875, + "completions/min_length": 130.0, + "completions/min_terminated_length": 130.0, + "entropy": 0.27137147448956966, + "epoch": 0.0356146292627857, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1762692630290985, + "learning_rate": 1e-06, + "loss": 0.0067, + "num_tokens": 96849385.0, + "reward": 0.44921875, + "reward_std": 0.23018452525138855, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 209, + "tools/generated_tokens": 4240.25390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1158.6640625, + "completions/mean_terminated_length": 1022.45947265625, + "completions/min_length": 132.0, + "completions/min_terminated_length": 132.0, + "entropy": 0.2671739049255848, + "epoch": 0.03578503418748802, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.177192822098732, + "learning_rate": 1e-06, + "loss": 0.032, + "num_tokens": 97225027.0, + "reward": 0.359375, + "reward_std": 0.30830952525138855, + "rewards/simpleverify_reward/mean": 0.359375, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 210, + "tools/generated_tokens": 4030.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1336.9375, + "completions/mean_terminated_length": 1146.8663330078125, + "completions/min_length": 75.0, + "completions/min_terminated_length": 75.0, + "entropy": 0.29814364202320576, + "epoch": 0.03595543911219034, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1752936989068985, + "learning_rate": 1e-06, + "loss": 0.0314, + "num_tokens": 97642931.0, + "reward": 0.47265625, + "reward_std": 0.2892768681049347, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 211, + "tools/generated_tokens": 4336.94921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1254.73046875, + "completions/mean_terminated_length": 1172.6680908203125, + "completions/min_length": 287.0, + "completions/min_terminated_length": 287.0, + "entropy": 0.28304185532033443, + "epoch": 0.03612584403689267, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.18348625302314758, + "learning_rate": 1e-06, + "loss": 0.0053, + "num_tokens": 98053182.0, + "reward": 0.50390625, + "reward_std": 0.35764625668525696, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 212, + "tools/generated_tokens": 4198.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1418.8984375, + "completions/mean_terminated_length": 1200.373779296875, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.3266041036695242, + "epoch": 0.03629624896159499, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17745855450630188, + "learning_rate": 1e-06, + "loss": 0.0266, + "num_tokens": 98498468.0, + "reward": 0.4921875, + "reward_std": 0.2730247974395752, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 213, + "tools/generated_tokens": 4882.90625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1392.9921875, + "completions/mean_terminated_length": 1271.6990966796875, + "completions/min_length": 90.0, + "completions/min_terminated_length": 90.0, + "entropy": 0.28982884902507067, + "epoch": 0.036466653886297315, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15318572521209717, + "learning_rate": 1e-06, + "loss": 0.042, + "num_tokens": 98929938.0, + "reward": 0.58984375, + "reward_std": 0.2049104869365692, + "rewards/simpleverify_reward/mean": 0.58984375, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 214, + "tools/generated_tokens": 4409.0078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1411.09375, + "completions/mean_terminated_length": 1198.796875, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.3525862656533718, + "epoch": 0.036637058810999636, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15535373985767365, + "learning_rate": 1e-06, + "loss": -0.0059, + "num_tokens": 99368538.0, + "reward": 0.34375, + "reward_std": 0.17493700981140137, + "rewards/simpleverify_reward/mean": 0.34375, + "rewards/simpleverify_reward/std": 0.47588926553726196, + "step": 215, + "tools/generated_tokens": 4867.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1136.0234375, + "completions/mean_terminated_length": 1083.272705078125, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.2982969619333744, + "epoch": 0.03680746373570196, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16756686568260193, + "learning_rate": 1e-06, + "loss": 0.0036, + "num_tokens": 99744672.0, + "reward": 0.453125, + "reward_std": 0.316123366355896, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 216, + "tools/generated_tokens": 3888.01953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1372.48828125, + "completions/mean_terminated_length": 1179.010009765625, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.30694323405623436, + "epoch": 0.036977868660404284, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16647158563137054, + "learning_rate": 1e-06, + "loss": 0.0192, + "num_tokens": 100172189.0, + "reward": 0.4921875, + "reward_std": 0.2452508509159088, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 217, + "tools/generated_tokens": 4300.5, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1289.03125, + "completions/mean_terminated_length": 1066.70703125, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.3073802124708891, + "epoch": 0.03714827358510661, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13852950930595398, + "learning_rate": 1e-06, + "loss": 0.0238, + "num_tokens": 100583205.0, + "reward": 0.41796875, + "reward_std": 0.169600710272789, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 218, + "tools/generated_tokens": 4425.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1266.91015625, + "completions/mean_terminated_length": 1086.6634521484375, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.2961250003427267, + "epoch": 0.03731867850980893, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1482100486755371, + "learning_rate": 1e-06, + "loss": 0.0184, + "num_tokens": 100992926.0, + "reward": 0.5625, + "reward_std": 0.26278093457221985, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 219, + "tools/generated_tokens": 4330.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1334.78125, + "completions/mean_terminated_length": 1157.3463134765625, + "completions/min_length": 296.0, + "completions/min_terminated_length": 296.0, + "entropy": 0.35707367956638336, + "epoch": 0.03748908343451126, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17978939414024353, + "learning_rate": 1e-06, + "loss": 0.0039, + "num_tokens": 101421190.0, + "reward": 0.5703125, + "reward_std": 0.31071737408638, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 220, + "tools/generated_tokens": 5030.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1439.80859375, + "completions/mean_terminated_length": 1206.3946533203125, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.3434401638805866, + "epoch": 0.03765948835921358, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17241796851158142, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 101872101.0, + "reward": 0.46875, + "reward_std": 0.24075186252593994, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 221, + "tools/generated_tokens": 5247.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1464.3984375, + "completions/mean_terminated_length": 1304.7064208984375, + "completions/min_length": 216.0, + "completions/min_terminated_length": 216.0, + "entropy": 0.3227778486907482, + "epoch": 0.03782989328391591, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14050684869289398, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 102323163.0, + "reward": 0.53515625, + "reward_std": 0.24534353613853455, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 222, + "tools/generated_tokens": 4824.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1236.2109375, + "completions/mean_terminated_length": 971.2279663085938, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.30677394196391106, + "epoch": 0.03800029820861823, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14632734656333923, + "learning_rate": 1e-06, + "loss": 0.012, + "num_tokens": 102722961.0, + "reward": 0.3984375, + "reward_std": 0.21662378311157227, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 223, + "tools/generated_tokens": 4924.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1288.19921875, + "completions/mean_terminated_length": 1205.9697265625, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "entropy": 0.2895997706800699, + "epoch": 0.038170703133320556, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15224234759807587, + "learning_rate": 1e-06, + "loss": 0.0288, + "num_tokens": 103124916.0, + "reward": 0.5, + "reward_std": 0.23778533935546875, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 224, + "tools/generated_tokens": 3928.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1339.4375, + "completions/mean_terminated_length": 1154.4482421875, + "completions/min_length": 15.0, + "completions/min_terminated_length": 15.0, + "entropy": 0.29526018910109997, + "epoch": 0.03834110805802288, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17124171555042267, + "learning_rate": 1e-06, + "loss": 0.023, + "num_tokens": 103552052.0, + "reward": 0.53515625, + "reward_std": 0.27697813510894775, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 225, + "tools/generated_tokens": 4547.453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1242.125, + "completions/mean_terminated_length": 984.5824584960938, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "entropy": 0.32621590234339237, + "epoch": 0.0385115129827252, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19975169003009796, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 103957508.0, + "reward": 0.5, + "reward_std": 0.25395601987838745, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 226, + "tools/generated_tokens": 4698.13671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1308.50390625, + "completions/mean_terminated_length": 990.4022216796875, + "completions/min_length": 107.0, + "completions/min_terminated_length": 107.0, + "entropy": 0.3738710843026638, + "epoch": 0.038681917907427525, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14983998239040375, + "learning_rate": 1e-06, + "loss": 0.016, + "num_tokens": 104376181.0, + "reward": 0.22265625, + "reward_std": 0.1936889886856079, + "rewards/simpleverify_reward/mean": 0.22265625, + "rewards/simpleverify_reward/std": 0.41684433817863464, + "step": 227, + "tools/generated_tokens": 5020.5078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1273.3984375, + "completions/mean_terminated_length": 1071.16748046875, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.32002140395343304, + "epoch": 0.038852322832129846, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14927567541599274, + "learning_rate": 1e-06, + "loss": 0.0005, + "num_tokens": 104783547.0, + "reward": 0.515625, + "reward_std": 0.2427476942539215, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 228, + "tools/generated_tokens": 4609.40625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1447.65625, + "completions/mean_terminated_length": 1133.1905517578125, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.3097304329276085, + "epoch": 0.03902272775683217, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11527150869369507, + "learning_rate": 1e-06, + "loss": 0.0202, + "num_tokens": 105240195.0, + "reward": 0.390625, + "reward_std": 0.17829003930091858, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 229, + "tools/generated_tokens": 5359.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2021.0, + "completions/mean_length": 1183.796875, + "completions/mean_terminated_length": 1110.559326171875, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "entropy": 0.28834480978548527, + "epoch": 0.039193132681534494, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16462694108486176, + "learning_rate": 1e-06, + "loss": 0.0295, + "num_tokens": 105616607.0, + "reward": 0.5234375, + "reward_std": 0.266690731048584, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 230, + "tools/generated_tokens": 3559.796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1300.5546875, + "completions/mean_terminated_length": 1132.4688720703125, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.3693099822849035, + "epoch": 0.03936353760623682, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.19829939305782318, + "learning_rate": 1e-06, + "loss": 0.0239, + "num_tokens": 106034141.0, + "reward": 0.3125, + "reward_std": 0.30828261375427246, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 231, + "tools/generated_tokens": 4892.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1380.33203125, + "completions/mean_terminated_length": 1103.674072265625, + "completions/min_length": 201.0, + "completions/min_terminated_length": 201.0, + "entropy": 0.36126304790377617, + "epoch": 0.03953394253093914, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15957269072532654, + "learning_rate": 1e-06, + "loss": 0.0367, + "num_tokens": 106475794.0, + "reward": 0.30859375, + "reward_std": 0.25956130027770996, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 232, + "tools/generated_tokens": 5148.3359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.83984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1520.0546875, + "completions/mean_terminated_length": 1317.4378662109375, + "completions/min_length": 101.0, + "completions/min_terminated_length": 101.0, + "entropy": 0.27091052010655403, + "epoch": 0.03970434745564147, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1620144098997116, + "learning_rate": 1e-06, + "loss": 0.0082, + "num_tokens": 106929008.0, + "reward": 0.4296875, + "reward_std": 0.2632311284542084, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 233, + "tools/generated_tokens": 4392.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1309.3203125, + "completions/mean_terminated_length": 1164.3597412109375, + "completions/min_length": 151.0, + "completions/min_terminated_length": 151.0, + "entropy": 0.3271372374147177, + "epoch": 0.03987475238034379, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17116455733776093, + "learning_rate": 1e-06, + "loss": 0.0132, + "num_tokens": 107348258.0, + "reward": 0.484375, + "reward_std": 0.1986129879951477, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 234, + "tools/generated_tokens": 3965.33984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1294.58203125, + "completions/mean_terminated_length": 1083.625, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.32896302081644535, + "epoch": 0.04004515730504612, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12428409606218338, + "learning_rate": 1e-06, + "loss": 0.0203, + "num_tokens": 107764087.0, + "reward": 0.34765625, + "reward_std": 0.1701192855834961, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 235, + "tools/generated_tokens": 4750.58203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1486.9140625, + "completions/mean_terminated_length": 1193.011962890625, + "completions/min_length": 107.0, + "completions/min_terminated_length": 107.0, + "entropy": 0.3677659723907709, + "epoch": 0.04021556222974844, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13566716015338898, + "learning_rate": 1e-06, + "loss": 0.0076, + "num_tokens": 108226577.0, + "reward": 0.3359375, + "reward_std": 0.19036275148391724, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 236, + "tools/generated_tokens": 5510.91796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.96484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2000.0, + "completions/mean_length": 1409.0390625, + "completions/mean_terminated_length": 1217.685302734375, + "completions/min_length": 149.0, + "completions/min_terminated_length": 149.0, + "entropy": 0.31223051622509956, + "epoch": 0.040385967154450766, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16460387408733368, + "learning_rate": 1e-06, + "loss": 0.0088, + "num_tokens": 108679195.0, + "reward": 0.31640625, + "reward_std": 0.2826101779937744, + "rewards/simpleverify_reward/mean": 0.31640625, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 237, + "tools/generated_tokens": 5329.0546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1198.70703125, + "completions/mean_terminated_length": 1085.968994140625, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.33975757844746113, + "epoch": 0.04055637207915309, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13597051799297333, + "learning_rate": 1e-06, + "loss": 0.0111, + "num_tokens": 109059792.0, + "reward": 0.5390625, + "reward_std": 0.1668444126844406, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 238, + "tools/generated_tokens": 3646.7109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1248.859375, + "completions/mean_terminated_length": 1118.0908203125, + "completions/min_length": 77.0, + "completions/min_terminated_length": 77.0, + "entropy": 0.3333216030150652, + "epoch": 0.040726777003855415, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1780129075050354, + "learning_rate": 1e-06, + "loss": 0.0126, + "num_tokens": 109461260.0, + "reward": 0.625, + "reward_std": 0.291700541973114, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 239, + "tools/generated_tokens": 4096.86328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1264.7421875, + "completions/mean_terminated_length": 1088.602783203125, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.35745963640511036, + "epoch": 0.040897181928557735, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17514276504516602, + "learning_rate": 1e-06, + "loss": 0.0093, + "num_tokens": 109869178.0, + "reward": 0.3828125, + "reward_std": 0.29590702056884766, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 240, + "tools/generated_tokens": 4968.75, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1181.03515625, + "completions/mean_terminated_length": 1039.1680908203125, + "completions/min_length": 45.0, + "completions/min_terminated_length": 45.0, + "entropy": 0.3018411621451378, + "epoch": 0.041067586853260056, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.124259814620018, + "learning_rate": 1e-06, + "loss": 0.0151, + "num_tokens": 110277523.0, + "reward": 0.4609375, + "reward_std": 0.20026493072509766, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 241, + "tools/generated_tokens": 4117.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1209.26953125, + "completions/mean_terminated_length": 1067.566162109375, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.328345762565732, + "epoch": 0.041237991777962384, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1596774160861969, + "learning_rate": 1e-06, + "loss": 0.027, + "num_tokens": 110669880.0, + "reward": 0.421875, + "reward_std": 0.2532879114151001, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 242, + "tools/generated_tokens": 4409.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1407.734375, + "completions/mean_terminated_length": 1194.3177490234375, + "completions/min_length": 35.0, + "completions/min_terminated_length": 35.0, + "entropy": 0.34485598281025887, + "epoch": 0.041408396702664704, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13737072050571442, + "learning_rate": 1e-06, + "loss": -0.0124, + "num_tokens": 111114804.0, + "reward": 0.44921875, + "reward_std": 0.19643138349056244, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 243, + "tools/generated_tokens": 5007.74609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1274.12109375, + "completions/mean_terminated_length": 1090.932373046875, + "completions/min_length": 112.0, + "completions/min_terminated_length": 112.0, + "entropy": 0.2873286344110966, + "epoch": 0.04157880162736703, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14881965517997742, + "learning_rate": 1e-06, + "loss": 0.0225, + "num_tokens": 111529747.0, + "reward": 0.41796875, + "reward_std": 0.2261517196893692, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 244, + "tools/generated_tokens": 4586.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1275.21484375, + "completions/mean_terminated_length": 1028.2421875, + "completions/min_length": 87.0, + "completions/min_terminated_length": 87.0, + "entropy": 0.3055835347622633, + "epoch": 0.04174920655206935, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13444218039512634, + "learning_rate": 1e-06, + "loss": 0.0466, + "num_tokens": 111944778.0, + "reward": 0.34765625, + "reward_std": 0.23195403814315796, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 245, + "tools/generated_tokens": 4803.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1265.47265625, + "completions/mean_terminated_length": 1149.6727294921875, + "completions/min_length": 78.0, + "completions/min_terminated_length": 78.0, + "entropy": 0.3193060848861933, + "epoch": 0.04191961147677168, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16008260846138, + "learning_rate": 1e-06, + "loss": 0.0306, + "num_tokens": 112343955.0, + "reward": 0.5, + "reward_std": 0.2652543783187866, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 246, + "tools/generated_tokens": 4273.484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1287.78515625, + "completions/mean_terminated_length": 1112.3509521484375, + "completions/min_length": 82.0, + "completions/min_terminated_length": 82.0, + "entropy": 0.28172095213085413, + "epoch": 0.042090016401474, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16602839529514313, + "learning_rate": 1e-06, + "loss": 0.0289, + "num_tokens": 112749276.0, + "reward": 0.53515625, + "reward_std": 0.20536066591739655, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 247, + "tools/generated_tokens": 4207.7890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.42578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1444.0078125, + "completions/mean_terminated_length": 1193.73486328125, + "completions/min_length": 186.0, + "completions/min_terminated_length": 186.0, + "entropy": 0.35894401371479034, + "epoch": 0.04226042132617633, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1456543207168579, + "learning_rate": 1e-06, + "loss": 0.0298, + "num_tokens": 113198046.0, + "reward": 0.4609375, + "reward_std": 0.26840826869010925, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 248, + "tools/generated_tokens": 5084.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1265.109375, + "completions/mean_terminated_length": 1157.2445068359375, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.32613920606672764, + "epoch": 0.04243082625087865, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1564120352268219, + "learning_rate": 1e-06, + "loss": 0.0121, + "num_tokens": 113603210.0, + "reward": 0.52734375, + "reward_std": 0.25029462575912476, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 249, + "tools/generated_tokens": 4433.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1351.50390625, + "completions/mean_terminated_length": 1138.290771484375, + "completions/min_length": 179.0, + "completions/min_terminated_length": 179.0, + "entropy": 0.30417640320956707, + "epoch": 0.04260123117558098, + "frac_reward_zero_std": 0.25, + "grad_norm": 1.2026276588439941, + "learning_rate": 1e-06, + "loss": 0.0032, + "num_tokens": 114037643.0, + "reward": 0.43359375, + "reward_std": 0.27749860286712646, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 250, + "tools/generated_tokens": 4975.50390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1422.83203125, + "completions/mean_terminated_length": 1158.8778076171875, + "completions/min_length": 241.0, + "completions/min_terminated_length": 241.0, + "entropy": 0.3520346116274595, + "epoch": 0.0427716361002833, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12789323925971985, + "learning_rate": 1e-06, + "loss": 0.0174, + "num_tokens": 114485888.0, + "reward": 0.33984375, + "reward_std": 0.1596985161304474, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 251, + "tools/generated_tokens": 5254.828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1381.640625, + "completions/mean_terminated_length": 1120.896728515625, + "completions/min_length": 272.0, + "completions/min_terminated_length": 272.0, + "entropy": 0.3559390101581812, + "epoch": 0.042942041024985625, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.164140984416008, + "learning_rate": 1e-06, + "loss": 0.0717, + "num_tokens": 114925684.0, + "reward": 0.2734375, + "reward_std": 0.3424764573574066, + "rewards/simpleverify_reward/mean": 0.2734375, + "rewards/simpleverify_reward/std": 0.446596622467041, + "step": 252, + "tools/generated_tokens": 5661.6484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.08984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1376.078125, + "completions/mean_terminated_length": 1183.628173828125, + "completions/min_length": 204.0, + "completions/min_terminated_length": 204.0, + "entropy": 0.2860397193580866, + "epoch": 0.043112445949687946, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.15713639557361603, + "learning_rate": 1e-06, + "loss": 0.0416, + "num_tokens": 115359624.0, + "reward": 0.578125, + "reward_std": 0.33410364389419556, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 253, + "tools/generated_tokens": 4888.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1262.1171875, + "completions/mean_terminated_length": 1149.852783203125, + "completions/min_length": 144.0, + "completions/min_terminated_length": 144.0, + "entropy": 0.28291032928973436, + "epoch": 0.04328285087439027, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.170853853225708, + "learning_rate": 1e-06, + "loss": -0.0058, + "num_tokens": 115758870.0, + "reward": 0.48046875, + "reward_std": 0.28905272483825684, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 254, + "tools/generated_tokens": 4078.1328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.31640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1479.47265625, + "completions/mean_terminated_length": 1216.3314208984375, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.3204533886164427, + "epoch": 0.043453255799092594, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17299295961856842, + "learning_rate": 1e-06, + "loss": 0.0274, + "num_tokens": 116218159.0, + "reward": 0.4296875, + "reward_std": 0.33490437269210815, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 255, + "tools/generated_tokens": 5439.48046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.93359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1321.59765625, + "completions/mean_terminated_length": 1104.045654296875, + "completions/min_length": 51.0, + "completions/min_terminated_length": 51.0, + "entropy": 0.29396906588226557, + "epoch": 0.043623660723794914, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1581735461950302, + "learning_rate": 1e-06, + "loss": -0.0013, + "num_tokens": 116640632.0, + "reward": 0.4453125, + "reward_std": 0.2293090522289276, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 256, + "tools/generated_tokens": 4609.6171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.02734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1195.21484375, + "completions/mean_terminated_length": 1171.240966796875, + "completions/min_length": 43.0, + "completions/min_terminated_length": 43.0, + "entropy": 0.25802111998200417, + "epoch": 0.04379406564849724, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1504235863685608, + "learning_rate": 1e-06, + "loss": 0.0101, + "num_tokens": 117019567.0, + "reward": 0.77734375, + "reward_std": 0.199052095413208, + "rewards/simpleverify_reward/mean": 0.77734375, + "rewards/simpleverify_reward/std": 0.41684433817863464, + "step": 257, + "tools/generated_tokens": 3523.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.13671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1347.33203125, + "completions/mean_terminated_length": 1185.6395263671875, + "completions/min_length": 157.0, + "completions/min_terminated_length": 157.0, + "entropy": 0.2890857020393014, + "epoch": 0.04396447057319956, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14119593799114227, + "learning_rate": 1e-06, + "loss": 0.0315, + "num_tokens": 117440452.0, + "reward": 0.51171875, + "reward_std": 0.1848640739917755, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 258, + "tools/generated_tokens": 4323.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1279.1875, + "completions/mean_terminated_length": 1177.1326904296875, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.29742043651640415, + "epoch": 0.04413487549790189, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14363500475883484, + "learning_rate": 1e-06, + "loss": 0.0274, + "num_tokens": 117851124.0, + "reward": 0.51953125, + "reward_std": 0.3115956783294678, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 259, + "tools/generated_tokens": 4759.19921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1396.6953125, + "completions/mean_terminated_length": 1151.5806884765625, + "completions/min_length": 300.0, + "completions/min_terminated_length": 300.0, + "entropy": 0.33709784410893917, + "epoch": 0.04430528042260421, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15909504890441895, + "learning_rate": 1e-06, + "loss": 0.0154, + "num_tokens": 118284790.0, + "reward": 0.44140625, + "reward_std": 0.2815985083580017, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 260, + "tools/generated_tokens": 4908.703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1325.0078125, + "completions/mean_terminated_length": 1206.699951171875, + "completions/min_length": 153.0, + "completions/min_terminated_length": 153.0, + "entropy": 0.26144256815314293, + "epoch": 0.04447568534730654, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1711936742067337, + "learning_rate": 1e-06, + "loss": 0.002, + "num_tokens": 118696616.0, + "reward": 0.4296875, + "reward_std": 0.27056455612182617, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 261, + "tools/generated_tokens": 3893.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2012.0, + "completions/mean_length": 1281.9609375, + "completions/mean_terminated_length": 1127.3145751953125, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.2766091823577881, + "epoch": 0.04464609027200886, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13621380925178528, + "learning_rate": 1e-06, + "loss": -0.0175, + "num_tokens": 119113022.0, + "reward": 0.515625, + "reward_std": 0.25350111722946167, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 262, + "tools/generated_tokens": 4705.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1295.52734375, + "completions/mean_terminated_length": 1191.8577880859375, + "completions/min_length": 53.0, + "completions/min_terminated_length": 53.0, + "entropy": 0.2822153940796852, + "epoch": 0.04481649519671119, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17557425796985626, + "learning_rate": 1e-06, + "loss": 0.0015, + "num_tokens": 119526885.0, + "reward": 0.54296875, + "reward_std": 0.2985538840293884, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 263, + "tools/generated_tokens": 4239.546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1385.0703125, + "completions/mean_terminated_length": 1258.651123046875, + "completions/min_length": 62.0, + "completions/min_terminated_length": 62.0, + "entropy": 0.2646393794566393, + "epoch": 0.04498690012141351, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18296407163143158, + "learning_rate": 1e-06, + "loss": -0.0112, + "num_tokens": 119950071.0, + "reward": 0.44140625, + "reward_std": 0.18880821764469147, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 264, + "tools/generated_tokens": 4089.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1339.57421875, + "completions/mean_terminated_length": 1158.9951171875, + "completions/min_length": 183.0, + "completions/min_terminated_length": 183.0, + "entropy": 0.2932362789288163, + "epoch": 0.045157305046115835, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1839541345834732, + "learning_rate": 1e-06, + "loss": 0.0007, + "num_tokens": 120377594.0, + "reward": 0.4375, + "reward_std": 0.24978771805763245, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 265, + "tools/generated_tokens": 5059.59375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.81640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1197.7109375, + "completions/mean_terminated_length": 1109.75, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.31323915906250477, + "epoch": 0.045327709970818156, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1793917864561081, + "learning_rate": 1e-06, + "loss": 0.0171, + "num_tokens": 120757472.0, + "reward": 0.58984375, + "reward_std": 0.2542063593864441, + "rewards/simpleverify_reward/mean": 0.58984375, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 266, + "tools/generated_tokens": 3981.70703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1363.0703125, + "completions/mean_terminated_length": 1110.34228515625, + "completions/min_length": 79.0, + "completions/min_terminated_length": 79.0, + "entropy": 0.32600368186831474, + "epoch": 0.04549811489552048, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1349548101425171, + "learning_rate": 1e-06, + "loss": 0.0239, + "num_tokens": 121189682.0, + "reward": 0.33203125, + "reward_std": 0.16181382536888123, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 267, + "tools/generated_tokens": 4915.07421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1355.92578125, + "completions/mean_terminated_length": 1170.915771484375, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.30558702535927296, + "epoch": 0.045668519820222804, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17803844809532166, + "learning_rate": 1e-06, + "loss": 0.0096, + "num_tokens": 121616399.0, + "reward": 0.5234375, + "reward_std": 0.2926844358444214, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 268, + "tools/generated_tokens": 4883.92578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2019.0, + "completions/mean_length": 1369.9765625, + "completions/mean_terminated_length": 1175.768798828125, + "completions/min_length": 200.0, + "completions/min_terminated_length": 200.0, + "entropy": 0.30885729752480984, + "epoch": 0.04583892474492513, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1312689632177353, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 122049817.0, + "reward": 0.4609375, + "reward_std": 0.17436380684375763, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 269, + "tools/generated_tokens": 4601.98828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1314.4375, + "completions/mean_terminated_length": 1140.797119140625, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.2693759361281991, + "epoch": 0.04600932966962745, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15356102585792542, + "learning_rate": 1e-06, + "loss": 0.0375, + "num_tokens": 122466985.0, + "reward": 0.60546875, + "reward_std": 0.2802300453186035, + "rewards/simpleverify_reward/mean": 0.60546875, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 270, + "tools/generated_tokens": 4458.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1276.453125, + "completions/mean_terminated_length": 1050.4444580078125, + "completions/min_length": 37.0, + "completions/min_terminated_length": 37.0, + "entropy": 0.34354778937995434, + "epoch": 0.04617973459432977, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19494813680648804, + "learning_rate": 1e-06, + "loss": -0.002, + "num_tokens": 122878509.0, + "reward": 0.5078125, + "reward_std": 0.26198214292526245, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 271, + "tools/generated_tokens": 4668.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1996.0, + "completions/mean_length": 1332.9453125, + "completions/mean_terminated_length": 1094.59375, + "completions/min_length": 223.0, + "completions/min_terminated_length": 223.0, + "entropy": 0.30806681886315346, + "epoch": 0.0463501395190321, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.10586902499198914, + "learning_rate": 1e-06, + "loss": 0.0118, + "num_tokens": 123310511.0, + "reward": 0.265625, + "reward_std": 0.11840169876813889, + "rewards/simpleverify_reward/mean": 0.265625, + "rewards/simpleverify_reward/std": 0.4425306022167206, + "step": 272, + "tools/generated_tokens": 4540.95703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2009.0, + "completions/mean_length": 1262.984375, + "completions/mean_terminated_length": 1108.9158935546875, + "completions/min_length": 71.0, + "completions/min_terminated_length": 71.0, + "entropy": 0.29315103963017464, + "epoch": 0.04652054444373442, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15973858535289764, + "learning_rate": 1e-06, + "loss": 0.0439, + "num_tokens": 123710187.0, + "reward": 0.50390625, + "reward_std": 0.1848640739917755, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 273, + "tools/generated_tokens": 4246.9921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.45703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1299.25, + "completions/mean_terminated_length": 1156.465087890625, + "completions/min_length": 367.0, + "completions/min_terminated_length": 367.0, + "entropy": 0.294969892129302, + "epoch": 0.04669094936843675, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.16467413306236267, + "learning_rate": 1e-06, + "loss": 0.019, + "num_tokens": 124134523.0, + "reward": 0.6484375, + "reward_std": 0.3220454454421997, + "rewards/simpleverify_reward/mean": 0.6484375, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 274, + "tools/generated_tokens": 4451.25390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1439.390625, + "completions/mean_terminated_length": 1298.9423828125, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.2964180205017328, + "epoch": 0.04686135429313907, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18400876224040985, + "learning_rate": 1e-06, + "loss": -0.002, + "num_tokens": 124580303.0, + "reward": 0.390625, + "reward_std": 0.29364442825317383, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 275, + "tools/generated_tokens": 4655.40625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1279.59765625, + "completions/mean_terminated_length": 1203.746826171875, + "completions/min_length": 163.0, + "completions/min_terminated_length": 163.0, + "entropy": 0.24685709085315466, + "epoch": 0.0470317592178414, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11953554302453995, + "learning_rate": 1e-06, + "loss": -0.0009, + "num_tokens": 124993064.0, + "reward": 0.4765625, + "reward_std": 0.17396602034568787, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 276, + "tools/generated_tokens": 3983.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1259.859375, + "completions/mean_terminated_length": 1077.9808349609375, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.3116687685251236, + "epoch": 0.04720216414254372, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1639777272939682, + "learning_rate": 1e-06, + "loss": -0.0014, + "num_tokens": 125393876.0, + "reward": 0.5, + "reward_std": 0.19189241528511047, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 277, + "tools/generated_tokens": 4099.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.38671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1145.62890625, + "completions/mean_terminated_length": 1034.8114013671875, + "completions/min_length": 46.0, + "completions/min_terminated_length": 46.0, + "entropy": 0.31531943939626217, + "epoch": 0.047372569067246045, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19424794614315033, + "learning_rate": 1e-06, + "loss": 0.0082, + "num_tokens": 125762085.0, + "reward": 0.46484375, + "reward_std": 0.2993527054786682, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 278, + "tools/generated_tokens": 3817.6328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2015.0, + "completions/mean_length": 1205.4296875, + "completions/mean_terminated_length": 1110.1826171875, + "completions/min_length": 122.0, + "completions/min_terminated_length": 122.0, + "entropy": 0.305449353531003, + "epoch": 0.047542973991948366, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14293110370635986, + "learning_rate": 1e-06, + "loss": 0.0108, + "num_tokens": 126147811.0, + "reward": 0.56640625, + "reward_std": 0.18771302700042725, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 279, + "tools/generated_tokens": 3885.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1472.8828125, + "completions/mean_terminated_length": 1269.0052490234375, + "completions/min_length": 119.0, + "completions/min_terminated_length": 119.0, + "entropy": 0.31037183478474617, + "epoch": 0.047713378916650694, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1498546600341797, + "learning_rate": 1e-06, + "loss": 0.0302, + "num_tokens": 126616677.0, + "reward": 0.32421875, + "reward_std": 0.2544988691806793, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 280, + "tools/generated_tokens": 5512.90234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.97265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1246.5390625, + "completions/mean_terminated_length": 1127.937255859375, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.2865572739392519, + "epoch": 0.047883783841353014, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16287490725517273, + "learning_rate": 1e-06, + "loss": 0.0151, + "num_tokens": 127020431.0, + "reward": 0.63671875, + "reward_std": 0.2679290771484375, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 281, + "tools/generated_tokens": 4134.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1338.421875, + "completions/mean_terminated_length": 1166.1942138671875, + "completions/min_length": 49.0, + "completions/min_terminated_length": 49.0, + "entropy": 0.34245736710727215, + "epoch": 0.04805418876605534, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1597912758588791, + "learning_rate": 1e-06, + "loss": 0.022, + "num_tokens": 127439963.0, + "reward": 0.3125, + "reward_std": 0.20146670937538147, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 282, + "tools/generated_tokens": 4362.4375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1383.9453125, + "completions/mean_terminated_length": 1198.0150146484375, + "completions/min_length": 131.0, + "completions/min_terminated_length": 131.0, + "entropy": 0.3314568540081382, + "epoch": 0.04822459369075766, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.158244788646698, + "learning_rate": 1e-06, + "loss": 0.0546, + "num_tokens": 127886029.0, + "reward": 0.32421875, + "reward_std": 0.21116769313812256, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 283, + "tools/generated_tokens": 4943.96875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1267.23046875, + "completions/mean_terminated_length": 1175.1746826171875, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.271186844445765, + "epoch": 0.04839499861545999, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16395214200019836, + "learning_rate": 1e-06, + "loss": -0.013, + "num_tokens": 128295752.0, + "reward": 0.62109375, + "reward_std": 0.28898242115974426, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 284, + "tools/generated_tokens": 4019.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1187.234375, + "completions/mean_terminated_length": 1018.2990112304688, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.31557429023087025, + "epoch": 0.04856540354016231, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1610065996646881, + "learning_rate": 1e-06, + "loss": 0.0314, + "num_tokens": 128672052.0, + "reward": 0.48828125, + "reward_std": 0.2523331046104431, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 285, + "tools/generated_tokens": 4123.23828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1420.62109375, + "completions/mean_terminated_length": 1155.727783203125, + "completions/min_length": 137.0, + "completions/min_terminated_length": 137.0, + "entropy": 0.28226154297590256, + "epoch": 0.04873580846486463, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14174875617027283, + "learning_rate": 1e-06, + "loss": 0.0084, + "num_tokens": 129126259.0, + "reward": 0.37890625, + "reward_std": 0.2175418734550476, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 286, + "tools/generated_tokens": 5596.63671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1163.99609375, + "completions/mean_terminated_length": 1080.8846435546875, + "completions/min_length": 51.0, + "completions/min_terminated_length": 51.0, + "entropy": 0.2925511756911874, + "epoch": 0.04890621338956696, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.180791974067688, + "learning_rate": 1e-06, + "loss": 0.0197, + "num_tokens": 129510738.0, + "reward": 0.62109375, + "reward_std": 0.28455185890197754, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 287, + "tools/generated_tokens": 4236.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2000.0, + "completions/mean_length": 1299.37890625, + "completions/mean_terminated_length": 1192.43310546875, + "completions/min_length": 80.0, + "completions/min_terminated_length": 80.0, + "entropy": 0.2576394444331527, + "epoch": 0.04907661831426928, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13466109335422516, + "learning_rate": 1e-06, + "loss": 0.0274, + "num_tokens": 129930083.0, + "reward": 0.6484375, + "reward_std": 0.20960845053195953, + "rewards/simpleverify_reward/mean": 0.6484375, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 288, + "tools/generated_tokens": 3923.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1504.68359375, + "completions/mean_terminated_length": 1156.423095703125, + "completions/min_length": 197.0, + "completions/min_terminated_length": 197.0, + "entropy": 0.34600187093019485, + "epoch": 0.04924702323897161, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11934173852205276, + "learning_rate": 1e-06, + "loss": 0.0333, + "num_tokens": 130399650.0, + "reward": 0.375, + "reward_std": 0.19156451523303986, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 289, + "tools/generated_tokens": 5672.703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.03515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1407.26953125, + "completions/mean_terminated_length": 1116.0341796875, + "completions/min_length": 153.0, + "completions/min_terminated_length": 153.0, + "entropy": 0.2882102522999048, + "epoch": 0.04941742816367393, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1570221483707428, + "learning_rate": 1e-06, + "loss": 0.0354, + "num_tokens": 130845255.0, + "reward": 0.41796875, + "reward_std": 0.2548314929008484, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 290, + "tools/generated_tokens": 5023.28515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1287.3671875, + "completions/mean_terminated_length": 1098.1365966796875, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.2894864585250616, + "epoch": 0.049587833088376256, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14320386946201324, + "learning_rate": 1e-06, + "loss": 0.0227, + "num_tokens": 131256117.0, + "reward": 0.5546875, + "reward_std": 0.2568049728870392, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 291, + "tools/generated_tokens": 4663.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1262.24609375, + "completions/mean_terminated_length": 1133.6680908203125, + "completions/min_length": 157.0, + "completions/min_terminated_length": 157.0, + "entropy": 0.3162839636206627, + "epoch": 0.049758238013078576, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17364174127578735, + "learning_rate": 1e-06, + "loss": 0.0313, + "num_tokens": 131659396.0, + "reward": 0.27734375, + "reward_std": 0.2902497947216034, + "rewards/simpleverify_reward/mean": 0.27734375, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 292, + "tools/generated_tokens": 4454.24609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1156.20703125, + "completions/mean_terminated_length": 1037.827392578125, + "completions/min_length": 33.0, + "completions/min_terminated_length": 33.0, + "entropy": 0.29122194834053516, + "epoch": 0.049928642937780904, + "frac_reward_zero_std": 0.0625, + "grad_norm": 0.22128801047801971, + "learning_rate": 1e-06, + "loss": -0.0045, + "num_tokens": 132040713.0, + "reward": 0.48046875, + "reward_std": 0.3702397346496582, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 293, + "tools/generated_tokens": 4332.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1352.2890625, + "completions/mean_terminated_length": 1215.7523193359375, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.2850738409906626, + "epoch": 0.050099047862483224, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1403249055147171, + "learning_rate": 1e-06, + "loss": -0.0267, + "num_tokens": 132463282.0, + "reward": 0.51953125, + "reward_std": 0.2100876271724701, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 294, + "tools/generated_tokens": 4352.3046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1317.55859375, + "completions/mean_terminated_length": 1174.200927734375, + "completions/min_length": 84.0, + "completions/min_terminated_length": 84.0, + "entropy": 0.2910716813057661, + "epoch": 0.05026945278718555, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16851194202899933, + "learning_rate": 1e-06, + "loss": -0.0114, + "num_tokens": 132877361.0, + "reward": 0.51171875, + "reward_std": 0.28488922119140625, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 295, + "tools/generated_tokens": 4637.5703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1414.515625, + "completions/mean_terminated_length": 1207.7305908203125, + "completions/min_length": 90.0, + "completions/min_terminated_length": 90.0, + "entropy": 0.30923354625701904, + "epoch": 0.05043985771188787, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14795434474945068, + "learning_rate": 1e-06, + "loss": -0.006, + "num_tokens": 133320821.0, + "reward": 0.4375, + "reward_std": 0.22468777000904083, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 296, + "tools/generated_tokens": 4742.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1341.859375, + "completions/mean_terminated_length": 1166.190185546875, + "completions/min_length": 57.0, + "completions/min_terminated_length": 57.0, + "entropy": 0.3143516555428505, + "epoch": 0.0506102626365902, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16825224459171295, + "learning_rate": 1e-06, + "loss": -0.01, + "num_tokens": 133743697.0, + "reward": 0.53125, + "reward_std": 0.2893039882183075, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 297, + "tools/generated_tokens": 4789.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1227.21875, + "completions/mean_terminated_length": 1079.705078125, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.25937829725444317, + "epoch": 0.05078066756129252, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1845153272151947, + "learning_rate": 1e-06, + "loss": 0.0043, + "num_tokens": 134141257.0, + "reward": 0.47265625, + "reward_std": 0.21003374457359314, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 298, + "tools/generated_tokens": 4475.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1175.48046875, + "completions/mean_terminated_length": 1041.851318359375, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.2886015884578228, + "epoch": 0.05095107248599485, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13403676450252533, + "learning_rate": 1e-06, + "loss": 0.0062, + "num_tokens": 134514916.0, + "reward": 0.60546875, + "reward_std": 0.2085040807723999, + "rewards/simpleverify_reward/mean": 0.60546875, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 299, + "tools/generated_tokens": 3647.49609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2021.0, + "completions/mean_length": 1383.234375, + "completions/mean_terminated_length": 1161.6458740234375, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.32432376593351364, + "epoch": 0.05112147741069717, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17836932837963104, + "learning_rate": 1e-06, + "loss": 0.0567, + "num_tokens": 134950336.0, + "reward": 0.46484375, + "reward_std": 0.31528323888778687, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 300, + "tools/generated_tokens": 5023.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1390.16015625, + "completions/mean_terminated_length": 1170.8802490234375, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.28654346987605095, + "epoch": 0.05129188233539949, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.22838200628757477, + "learning_rate": 1e-06, + "loss": 0.0012, + "num_tokens": 135382921.0, + "reward": 0.3125, + "reward_std": 0.20687922835350037, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 301, + "tools/generated_tokens": 4806.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1296.2890625, + "completions/mean_terminated_length": 1090.6019287109375, + "completions/min_length": 39.0, + "completions/min_terminated_length": 39.0, + "entropy": 0.32894255965948105, + "epoch": 0.05146228726010182, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14717616140842438, + "learning_rate": 1e-06, + "loss": 0.0265, + "num_tokens": 135794131.0, + "reward": 0.44140625, + "reward_std": 0.21863040328025818, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 302, + "tools/generated_tokens": 4944.2890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.78125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1415.41796875, + "completions/mean_terminated_length": 1191.174560546875, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "entropy": 0.3031544340774417, + "epoch": 0.05163269218480414, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20804932713508606, + "learning_rate": 1e-06, + "loss": -0.008, + "num_tokens": 136254958.0, + "reward": 0.375, + "reward_std": 0.24233347177505493, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 303, + "tools/generated_tokens": 4863.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1237.66796875, + "completions/mean_terminated_length": 1187.232421875, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.26903535425662994, + "epoch": 0.051803097109506466, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16161197423934937, + "learning_rate": 1e-06, + "loss": 0.0379, + "num_tokens": 136645689.0, + "reward": 0.70703125, + "reward_std": 0.2602487802505493, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 304, + "tools/generated_tokens": 3581.6640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.14453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2014.0, + "completions/mean_length": 1308.9765625, + "completions/mean_terminated_length": 1232.52587890625, + "completions/min_length": 36.0, + "completions/min_terminated_length": 36.0, + "entropy": 0.3003286551684141, + "epoch": 0.051973502034208786, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16468428075313568, + "learning_rate": 1e-06, + "loss": 0.0019, + "num_tokens": 137054851.0, + "reward": 0.59765625, + "reward_std": 0.2183406949043274, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 305, + "tools/generated_tokens": 4068.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1354.546875, + "completions/mean_terminated_length": 1113.66845703125, + "completions/min_length": 159.0, + "completions/min_terminated_length": 159.0, + "entropy": 0.2759744944050908, + "epoch": 0.052143906958911114, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1579194813966751, + "learning_rate": 1e-06, + "loss": 0.0297, + "num_tokens": 137490847.0, + "reward": 0.375, + "reward_std": 0.256390780210495, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 306, + "tools/generated_tokens": 5098.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1155.61328125, + "completions/mean_terminated_length": 1063.29736328125, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.29237478971481323, + "epoch": 0.052314311883613435, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1777174174785614, + "learning_rate": 1e-06, + "loss": 0.0371, + "num_tokens": 137862716.0, + "reward": 0.64453125, + "reward_std": 0.2609933912754059, + "rewards/simpleverify_reward/mean": 0.64453125, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 307, + "tools/generated_tokens": 3939.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1302.82421875, + "completions/mean_terminated_length": 1064.685546875, + "completions/min_length": 21.0, + "completions/min_terminated_length": 21.0, + "entropy": 0.28632466681301594, + "epoch": 0.05248471680831576, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16957347095012665, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 138279983.0, + "reward": 0.50390625, + "reward_std": 0.2303449958562851, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 308, + "tools/generated_tokens": 4750.83984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1420.8125, + "completions/mean_terminated_length": 1170.6337890625, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.28520943596959114, + "epoch": 0.05265512173301808, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1310110241174698, + "learning_rate": 1e-06, + "loss": 0.0034, + "num_tokens": 138725119.0, + "reward": 0.45703125, + "reward_std": 0.23404711484909058, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 309, + "tools/generated_tokens": 5196.8359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.84375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1313.05078125, + "completions/mean_terminated_length": 1125.7156982421875, + "completions/min_length": 138.0, + "completions/min_terminated_length": 138.0, + "entropy": 0.36094664968550205, + "epoch": 0.05282552665772041, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18607859313488007, + "learning_rate": 1e-06, + "loss": 0.0292, + "num_tokens": 139150236.0, + "reward": 0.44921875, + "reward_std": 0.245716854929924, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 310, + "tools/generated_tokens": 4705.0625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1431.7578125, + "completions/mean_terminated_length": 1190.625, + "completions/min_length": 150.0, + "completions/min_terminated_length": 150.0, + "entropy": 0.339433029294014, + "epoch": 0.05299593158242273, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13819488883018494, + "learning_rate": 1e-06, + "loss": 0.0466, + "num_tokens": 139589838.0, + "reward": 0.37890625, + "reward_std": 0.20388561487197876, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 311, + "tools/generated_tokens": 4759.765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1355.0546875, + "completions/mean_terminated_length": 1124.078125, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.28534994274377823, + "epoch": 0.05316633650712506, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16902461647987366, + "learning_rate": 1e-06, + "loss": 0.0052, + "num_tokens": 140019132.0, + "reward": 0.44140625, + "reward_std": 0.26452332735061646, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 312, + "tools/generated_tokens": 4787.06640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.67578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1153.1875, + "completions/mean_terminated_length": 1064.8626708984375, + "completions/min_length": 137.0, + "completions/min_terminated_length": 137.0, + "entropy": 0.34434532187879086, + "epoch": 0.05333674143182738, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17630203068256378, + "learning_rate": 1e-06, + "loss": -0.0073, + "num_tokens": 140396508.0, + "reward": 0.4609375, + "reward_std": 0.23819956183433533, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 313, + "tools/generated_tokens": 4025.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1354.375, + "completions/mean_terminated_length": 1137.3948974609375, + "completions/min_length": 260.0, + "completions/min_terminated_length": 260.0, + "entropy": 0.32829746417701244, + "epoch": 0.05350714635652971, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17183540761470795, + "learning_rate": 1e-06, + "loss": 0.0291, + "num_tokens": 140831036.0, + "reward": 0.36328125, + "reward_std": 0.3206353783607483, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 314, + "tools/generated_tokens": 5274.375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1262.4140625, + "completions/mean_terminated_length": 1142.09912109375, + "completions/min_length": 122.0, + "completions/min_terminated_length": 122.0, + "entropy": 0.2923651207238436, + "epoch": 0.05367755128123203, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17488974332809448, + "learning_rate": 1e-06, + "loss": 0.0046, + "num_tokens": 141240950.0, + "reward": 0.50390625, + "reward_std": 0.326728880405426, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 315, + "tools/generated_tokens": 4366.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1423.34765625, + "completions/mean_terminated_length": 1188.263427734375, + "completions/min_length": 95.0, + "completions/min_terminated_length": 95.0, + "entropy": 0.27633984480053186, + "epoch": 0.05384795620593435, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12932582199573517, + "learning_rate": 1e-06, + "loss": 0.0071, + "num_tokens": 141686511.0, + "reward": 0.328125, + "reward_std": 0.19846853613853455, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 316, + "tools/generated_tokens": 5119.35546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1349.7265625, + "completions/mean_terminated_length": 1167.4285888671875, + "completions/min_length": 145.0, + "completions/min_terminated_length": 145.0, + "entropy": 0.29339468479156494, + "epoch": 0.054018361130636676, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.18806594610214233, + "learning_rate": 1e-06, + "loss": 0.0023, + "num_tokens": 142120489.0, + "reward": 0.4375, + "reward_std": 0.3388923406600952, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 317, + "tools/generated_tokens": 4981.75390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1343.5078125, + "completions/mean_terminated_length": 1256.9912109375, + "completions/min_length": 1.0, + "completions/min_terminated_length": 1.0, + "entropy": 0.26533154770731926, + "epoch": 0.054188766055339, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12821224331855774, + "learning_rate": 1e-06, + "loss": 0.0102, + "num_tokens": 142529371.0, + "reward": 0.50390625, + "reward_std": 0.15613234043121338, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 318, + "tools/generated_tokens": 3607.51171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.10546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1137.10546875, + "completions/mean_terminated_length": 1011.6044921875, + "completions/min_length": 39.0, + "completions/min_terminated_length": 39.0, + "entropy": 0.2547017401084304, + "epoch": 0.054359170980041324, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1771804690361023, + "learning_rate": 1e-06, + "loss": 0.0389, + "num_tokens": 142895526.0, + "reward": 0.609375, + "reward_std": 0.23283424973487854, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 319, + "tools/generated_tokens": 3753.109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.27734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1260.55078125, + "completions/mean_terminated_length": 1135.8416748046875, + "completions/min_length": 75.0, + "completions/min_terminated_length": 75.0, + "entropy": 0.27737378515303135, + "epoch": 0.054529575904743645, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1308823972940445, + "learning_rate": 1e-06, + "loss": 0.0094, + "num_tokens": 143295379.0, + "reward": 0.3984375, + "reward_std": 0.16691282391548157, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 320, + "tools/generated_tokens": 4052.55859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1292.5546875, + "completions/mean_terminated_length": 1217.9827880859375, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.29106081649661064, + "epoch": 0.05469998082944597, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.1829807162284851, + "learning_rate": 1e-06, + "loss": 0.0172, + "num_tokens": 143706257.0, + "reward": 0.40234375, + "reward_std": 0.36748284101486206, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 321, + "tools/generated_tokens": 4348.55859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1392.87890625, + "completions/mean_terminated_length": 1241.6971435546875, + "completions/min_length": 85.0, + "completions/min_terminated_length": 85.0, + "entropy": 0.29241783916950226, + "epoch": 0.05487038575414829, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1687900274991989, + "learning_rate": 1e-06, + "loss": 0.0421, + "num_tokens": 144143938.0, + "reward": 0.46875, + "reward_std": 0.29602646827697754, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 322, + "tools/generated_tokens": 4840.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1342.921875, + "completions/mean_terminated_length": 1158.84228515625, + "completions/min_length": 43.0, + "completions/min_terminated_length": 43.0, + "entropy": 0.3193067070096731, + "epoch": 0.05504079067885062, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19132868945598602, + "learning_rate": 1e-06, + "loss": 0.017, + "num_tokens": 144585150.0, + "reward": 0.4296875, + "reward_std": 0.2655054032802582, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 323, + "tools/generated_tokens": 4814.94140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1299.42578125, + "completions/mean_terminated_length": 1203.7928466796875, + "completions/min_length": 101.0, + "completions/min_terminated_length": 101.0, + "entropy": 0.3053978104144335, + "epoch": 0.05521119560355294, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16768132150173187, + "learning_rate": 1e-06, + "loss": 0.0082, + "num_tokens": 144995419.0, + "reward": 0.34765625, + "reward_std": 0.24562345445156097, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 324, + "tools/generated_tokens": 4115.43359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1207.55078125, + "completions/mean_terminated_length": 1083.1839599609375, + "completions/min_length": 54.0, + "completions/min_terminated_length": 54.0, + "entropy": 0.31183927692472935, + "epoch": 0.05538160052825527, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17047731578350067, + "learning_rate": 1e-06, + "loss": 0.0214, + "num_tokens": 145393080.0, + "reward": 0.63671875, + "reward_std": 0.27880415320396423, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 325, + "tools/generated_tokens": 4255.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1242.26171875, + "completions/mean_terminated_length": 1162.73388671875, + "completions/min_length": 252.0, + "completions/min_terminated_length": 252.0, + "entropy": 0.2630101628601551, + "epoch": 0.05555200545295759, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14168424904346466, + "learning_rate": 1e-06, + "loss": 0.0229, + "num_tokens": 145800203.0, + "reward": 0.6171875, + "reward_std": 0.20215418934822083, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 326, + "tools/generated_tokens": 3938.27734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1313.8046875, + "completions/mean_terminated_length": 1063.952880859375, + "completions/min_length": 144.0, + "completions/min_terminated_length": 144.0, + "entropy": 0.35248119942843914, + "epoch": 0.05572241037765992, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1601688712835312, + "learning_rate": 1e-06, + "loss": 0.0226, + "num_tokens": 146218825.0, + "reward": 0.35546875, + "reward_std": 0.24520954489707947, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 327, + "tools/generated_tokens": 4833.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1230.7734375, + "completions/mean_terminated_length": 1097.04541015625, + "completions/min_length": 115.0, + "completions/min_terminated_length": 115.0, + "entropy": 0.30171683616936207, + "epoch": 0.05589281530236224, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16057145595550537, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 146616991.0, + "reward": 0.39453125, + "reward_std": 0.22226692736148834, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 328, + "tools/generated_tokens": 4422.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1200.1953125, + "completions/mean_terminated_length": 1143.675048828125, + "completions/min_length": 8.0, + "completions/min_terminated_length": 8.0, + "entropy": 0.284699235111475, + "epoch": 0.056063220227064565, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15954619646072388, + "learning_rate": 1e-06, + "loss": -0.0055, + "num_tokens": 147005265.0, + "reward": 0.58203125, + "reward_std": 0.2673723101615906, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 329, + "tools/generated_tokens": 3824.19921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1379.62890625, + "completions/mean_terminated_length": 1252.172119140625, + "completions/min_length": 154.0, + "completions/min_terminated_length": 154.0, + "entropy": 0.3053628709167242, + "epoch": 0.056233625151766886, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16071152687072754, + "learning_rate": 1e-06, + "loss": 0.0109, + "num_tokens": 147438146.0, + "reward": 0.609375, + "reward_std": 0.304276704788208, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 330, + "tools/generated_tokens": 4427.625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.36328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2011.0, + "completions/mean_length": 1453.5, + "completions/mean_terminated_length": 1114.343505859375, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.34157600067555904, + "epoch": 0.05640403007646921, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13115476071834564, + "learning_rate": 1e-06, + "loss": 0.0216, + "num_tokens": 147895138.0, + "reward": 0.3671875, + "reward_std": 0.2007330358028412, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 331, + "tools/generated_tokens": 5541.52734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.99609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1347.921875, + "completions/mean_terminated_length": 1094.718017578125, + "completions/min_length": 169.0, + "completions/min_terminated_length": 169.0, + "entropy": 0.31185402534902096, + "epoch": 0.056574435001171534, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16882377862930298, + "learning_rate": 1e-06, + "loss": 0.031, + "num_tokens": 148331582.0, + "reward": 0.265625, + "reward_std": 0.18420085310935974, + "rewards/simpleverify_reward/mean": 0.265625, + "rewards/simpleverify_reward/std": 0.4425306022167206, + "step": 332, + "tools/generated_tokens": 5195.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1307.73046875, + "completions/mean_terminated_length": 1109.836669921875, + "completions/min_length": 28.0, + "completions/min_terminated_length": 28.0, + "entropy": 0.29180445708334446, + "epoch": 0.056744839925873855, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17275694012641907, + "learning_rate": 1e-06, + "loss": 0.0276, + "num_tokens": 148750601.0, + "reward": 0.53515625, + "reward_std": 0.2933111786842346, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 333, + "tools/generated_tokens": 4939.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1329.640625, + "completions/mean_terminated_length": 1176.4407958984375, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.2849424909800291, + "epoch": 0.05691524485057618, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1704791635274887, + "learning_rate": 1e-06, + "loss": 0.0101, + "num_tokens": 149177085.0, + "reward": 0.4921875, + "reward_std": 0.25355497002601624, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 334, + "tools/generated_tokens": 4553.65234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1286.7890625, + "completions/mean_terminated_length": 1038.3211669921875, + "completions/min_length": 405.0, + "completions/min_terminated_length": 405.0, + "entropy": 0.33647651597857475, + "epoch": 0.0570856497752785, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1627286821603775, + "learning_rate": 1e-06, + "loss": 0.0078, + "num_tokens": 149586551.0, + "reward": 0.26171875, + "reward_std": 0.22039085626602173, + "rewards/simpleverify_reward/mean": 0.26171875, + "rewards/simpleverify_reward/std": 0.4404313564300537, + "step": 335, + "tools/generated_tokens": 4806.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1992.0, + "completions/mean_length": 1180.13671875, + "completions/mean_terminated_length": 1056.15625, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.2707588989287615, + "epoch": 0.05725605469998083, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20591795444488525, + "learning_rate": 1e-06, + "loss": 0.0251, + "num_tokens": 149969866.0, + "reward": 0.5, + "reward_std": 0.30712568759918213, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 336, + "tools/generated_tokens": 4036.1484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1382.7890625, + "completions/mean_terminated_length": 1209.1280517578125, + "completions/min_length": 202.0, + "completions/min_terminated_length": 202.0, + "entropy": 0.31500553060323, + "epoch": 0.05742645962468315, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.09929162263870239, + "learning_rate": 1e-06, + "loss": -0.0164, + "num_tokens": 150400244.0, + "reward": 0.46484375, + "reward_std": 0.11039985716342926, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 337, + "tools/generated_tokens": 4390.8046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1521.7578125, + "completions/mean_terminated_length": 1319.810791015625, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.2835215609520674, + "epoch": 0.05759686454938548, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11700831353664398, + "learning_rate": 1e-06, + "loss": 0.0187, + "num_tokens": 150866198.0, + "reward": 0.46875, + "reward_std": 0.16707327961921692, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 338, + "tools/generated_tokens": 4929.7734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1209.3984375, + "completions/mean_terminated_length": 1072.1727294921875, + "completions/min_length": 166.0, + "completions/min_terminated_length": 166.0, + "entropy": 0.2663586363196373, + "epoch": 0.0577672694740878, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16561011970043182, + "learning_rate": 1e-06, + "loss": 0.0185, + "num_tokens": 151251996.0, + "reward": 0.5234375, + "reward_std": 0.2843548059463501, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 339, + "tools/generated_tokens": 4169.421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1271.2734375, + "completions/mean_terminated_length": 1160.3125, + "completions/min_length": 131.0, + "completions/min_terminated_length": 131.0, + "entropy": 0.28707977943122387, + "epoch": 0.05793767439879013, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12584614753723145, + "learning_rate": 1e-06, + "loss": 0.0019, + "num_tokens": 151667730.0, + "reward": 0.5703125, + "reward_std": 0.13149452209472656, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 340, + "tools/generated_tokens": 3807.27734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.23828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1407.93359375, + "completions/mean_terminated_length": 1244.7843017578125, + "completions/min_length": 185.0, + "completions/min_terminated_length": 185.0, + "entropy": 0.25658425129950047, + "epoch": 0.05810807932349245, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11719018220901489, + "learning_rate": 1e-06, + "loss": 0.0047, + "num_tokens": 152114033.0, + "reward": 0.49609375, + "reward_std": 0.15834102034568787, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 341, + "tools/generated_tokens": 4631.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1293.5234375, + "completions/mean_terminated_length": 1145.4532470703125, + "completions/min_length": 115.0, + "completions/min_terminated_length": 115.0, + "entropy": 0.3050544150173664, + "epoch": 0.058278484248194776, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1705986112356186, + "learning_rate": 1e-06, + "loss": 0.0077, + "num_tokens": 152524375.0, + "reward": 0.265625, + "reward_std": 0.2792971134185791, + "rewards/simpleverify_reward/mean": 0.265625, + "rewards/simpleverify_reward/std": 0.4425306022167206, + "step": 342, + "tools/generated_tokens": 4125.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1317.09375, + "completions/mean_terminated_length": 1152.7320556640625, + "completions/min_length": 269.0, + "completions/min_terminated_length": 269.0, + "entropy": 0.2780300956219435, + "epoch": 0.058448889172897096, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17361460626125336, + "learning_rate": 1e-06, + "loss": 0.0221, + "num_tokens": 152937135.0, + "reward": 0.578125, + "reward_std": 0.30502164363861084, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 343, + "tools/generated_tokens": 4517.109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1232.47265625, + "completions/mean_terminated_length": 1090.31640625, + "completions/min_length": 41.0, + "completions/min_terminated_length": 41.0, + "entropy": 0.2694319849833846, + "epoch": 0.058619294097599424, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.2018771916627884, + "learning_rate": 1e-06, + "loss": 0.0195, + "num_tokens": 153338744.0, + "reward": 0.6171875, + "reward_std": 0.3160597085952759, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 344, + "tools/generated_tokens": 4336.47265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1309.08203125, + "completions/mean_terminated_length": 1180.27978515625, + "completions/min_length": 158.0, + "completions/min_terminated_length": 158.0, + "entropy": 0.3142383638769388, + "epoch": 0.058789699022301745, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16071529686450958, + "learning_rate": 1e-06, + "loss": 0.0328, + "num_tokens": 153758045.0, + "reward": 0.55078125, + "reward_std": 0.27139341831207275, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 345, + "tools/generated_tokens": 4269.0859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1481.47265625, + "completions/mean_terminated_length": 1194.8883056640625, + "completions/min_length": 200.0, + "completions/min_terminated_length": 200.0, + "entropy": 0.2590813608840108, + "epoch": 0.058960103947004065, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13019989430904388, + "learning_rate": 1e-06, + "loss": 0.0193, + "num_tokens": 154212246.0, + "reward": 0.390625, + "reward_std": 0.2226376086473465, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 346, + "tools/generated_tokens": 5049.48046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1335.6171875, + "completions/mean_terminated_length": 1265.296142578125, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.28239644318819046, + "epoch": 0.05913050887170639, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.13273970782756805, + "learning_rate": 1e-06, + "loss": 0.0063, + "num_tokens": 154631492.0, + "reward": 0.515625, + "reward_std": 0.12136821448802948, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 347, + "tools/generated_tokens": 4191.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1313.88671875, + "completions/mean_terminated_length": 1216.4556884765625, + "completions/min_length": 301.0, + "completions/min_terminated_length": 301.0, + "entropy": 0.290899645537138, + "epoch": 0.059300913796408714, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17403538525104523, + "learning_rate": 1e-06, + "loss": 0.0375, + "num_tokens": 155045191.0, + "reward": 0.546875, + "reward_std": 0.26983416080474854, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 348, + "tools/generated_tokens": 4561.92578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1295.77734375, + "completions/mean_terminated_length": 1210.743408203125, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.29990064818412066, + "epoch": 0.05947131872111104, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17534621059894562, + "learning_rate": 1e-06, + "loss": 0.0128, + "num_tokens": 155456238.0, + "reward": 0.5234375, + "reward_std": 0.29847443103790283, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 349, + "tools/generated_tokens": 4311.78125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1355.2109375, + "completions/mean_terminated_length": 1252.690673828125, + "completions/min_length": 71.0, + "completions/min_terminated_length": 71.0, + "entropy": 0.2619368303567171, + "epoch": 0.05964172364581336, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.10621669888496399, + "learning_rate": 1e-06, + "loss": -0.0139, + "num_tokens": 155880852.0, + "reward": 0.4453125, + "reward_std": 0.14954319596290588, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 350, + "tools/generated_tokens": 4259.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1201.28125, + "completions/mean_terminated_length": 1105.5694580078125, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.2944045700132847, + "epoch": 0.05981212857051569, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1531706303358078, + "learning_rate": 1e-06, + "loss": 0.0225, + "num_tokens": 156275180.0, + "reward": 0.61328125, + "reward_std": 0.2757830023765564, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 351, + "tools/generated_tokens": 4137.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1337.43359375, + "completions/mean_terminated_length": 1224.905029296875, + "completions/min_length": 211.0, + "completions/min_terminated_length": 211.0, + "entropy": 0.26592031866312027, + "epoch": 0.05998253349521801, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14751036465168, + "learning_rate": 1e-06, + "loss": -0.009, + "num_tokens": 156692843.0, + "reward": 0.42578125, + "reward_std": 0.22358623147010803, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 352, + "tools/generated_tokens": 4361.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1292.46484375, + "completions/mean_terminated_length": 1085.726318359375, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.27806926518678665, + "epoch": 0.06015293841992034, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13071857392787933, + "learning_rate": 1e-06, + "loss": 0.0119, + "num_tokens": 157107282.0, + "reward": 0.328125, + "reward_std": 0.19673973321914673, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 353, + "tools/generated_tokens": 4452.47265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1312.4921875, + "completions/mean_terminated_length": 1184.2843017578125, + "completions/min_length": 199.0, + "completions/min_terminated_length": 199.0, + "entropy": 0.25763664301484823, + "epoch": 0.06032334334462266, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14546117186546326, + "learning_rate": 1e-06, + "loss": 0.0232, + "num_tokens": 157524192.0, + "reward": 0.515625, + "reward_std": 0.21455954015254974, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 354, + "tools/generated_tokens": 4216.5, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1212.2578125, + "completions/mean_terminated_length": 1101.318603515625, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.2963197957724333, + "epoch": 0.060493748269324986, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2035531848669052, + "learning_rate": 1e-06, + "loss": 0.0329, + "num_tokens": 157915042.0, + "reward": 0.51953125, + "reward_std": 0.30379754304885864, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 355, + "tools/generated_tokens": 4100.25, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1256.3203125, + "completions/mean_terminated_length": 1203.541748046875, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.2689771419391036, + "epoch": 0.06066415319402731, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1614867001771927, + "learning_rate": 1e-06, + "loss": 0.0176, + "num_tokens": 158314004.0, + "reward": 0.6015625, + "reward_std": 0.25890904664993286, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 356, + "tools/generated_tokens": 3888.33203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2007.0, + "completions/mean_length": 1229.04296875, + "completions/mean_terminated_length": 1128.4736328125, + "completions/min_length": 47.0, + "completions/min_terminated_length": 47.0, + "entropy": 0.300431115552783, + "epoch": 0.060834558118729634, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15670716762542725, + "learning_rate": 1e-06, + "loss": 0.0242, + "num_tokens": 158710735.0, + "reward": 0.51953125, + "reward_std": 0.19652670621871948, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 357, + "tools/generated_tokens": 4037.046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1245.8515625, + "completions/mean_terminated_length": 1166.6695556640625, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.2689073383808136, + "epoch": 0.061004963043431955, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17072314023971558, + "learning_rate": 1e-06, + "loss": 0.0399, + "num_tokens": 159103593.0, + "reward": 0.67578125, + "reward_std": 0.22358150780200958, + "rewards/simpleverify_reward/mean": 0.67578125, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 358, + "tools/generated_tokens": 3533.859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1285.4375, + "completions/mean_terminated_length": 1113.9521484375, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.29637575056403875, + "epoch": 0.06117536796813428, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16646058857440948, + "learning_rate": 1e-06, + "loss": -0.0002, + "num_tokens": 159513913.0, + "reward": 0.45703125, + "reward_std": 0.24538421630859375, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 359, + "tools/generated_tokens": 4637.4375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.63671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1215.859375, + "completions/mean_terminated_length": 1079.69091796875, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.2793931197375059, + "epoch": 0.0613457728928366, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15953166782855988, + "learning_rate": 1e-06, + "loss": 0.0144, + "num_tokens": 159898293.0, + "reward": 0.5, + "reward_std": 0.21438735723495483, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 360, + "tools/generated_tokens": 3711.859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.21875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1542.06640625, + "completions/mean_terminated_length": 1217.769287109375, + "completions/min_length": 273.0, + "completions/min_terminated_length": 273.0, + "entropy": 0.2827363107353449, + "epoch": 0.061516177817538924, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16723506152629852, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 160374646.0, + "reward": 0.30859375, + "reward_std": 0.161190003156662, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 361, + "tools/generated_tokens": 5526.1015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1998.0, + "completions/mean_length": 1189.5234375, + "completions/mean_terminated_length": 1092.478271484375, + "completions/min_length": 277.0, + "completions/min_terminated_length": 277.0, + "entropy": 0.27755394764244556, + "epoch": 0.06168658274224125, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20288144052028656, + "learning_rate": 1e-06, + "loss": -0.0119, + "num_tokens": 160754044.0, + "reward": 0.640625, + "reward_std": 0.23778341710567474, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 362, + "tools/generated_tokens": 3629.52734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1423.7109375, + "completions/mean_terminated_length": 1240.83837890625, + "completions/min_length": 366.0, + "completions/min_terminated_length": 366.0, + "entropy": 0.3219546005129814, + "epoch": 0.06185698766694357, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13976918160915375, + "learning_rate": 1e-06, + "loss": 0.0118, + "num_tokens": 161199090.0, + "reward": 0.47265625, + "reward_std": 0.19825831055641174, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 363, + "tools/generated_tokens": 4967.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1211.7109375, + "completions/mean_terminated_length": 1109.0087890625, + "completions/min_length": 45.0, + "completions/min_terminated_length": 45.0, + "entropy": 0.3026361558586359, + "epoch": 0.0620273925916459, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1752837598323822, + "learning_rate": 1e-06, + "loss": 0.0242, + "num_tokens": 161587336.0, + "reward": 0.56640625, + "reward_std": 0.26411134004592896, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 364, + "tools/generated_tokens": 4027.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1468.41796875, + "completions/mean_terminated_length": 1175.2235107421875, + "completions/min_length": 113.0, + "completions/min_terminated_length": 113.0, + "entropy": 0.3634508866816759, + "epoch": 0.06219779751634822, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14967897534370422, + "learning_rate": 1e-06, + "loss": 0.0301, + "num_tokens": 162057123.0, + "reward": 0.4453125, + "reward_std": 0.21996080875396729, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 365, + "tools/generated_tokens": 5540.421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.98828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1338.390625, + "completions/mean_terminated_length": 1166.1553955078125, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.32216991670429707, + "epoch": 0.06236820244105055, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1275116205215454, + "learning_rate": 1e-06, + "loss": 0.0251, + "num_tokens": 162492103.0, + "reward": 0.34765625, + "reward_std": 0.18408125638961792, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 366, + "tools/generated_tokens": 5074.39453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.82421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1334.4921875, + "completions/mean_terminated_length": 1156.990234375, + "completions/min_length": 193.0, + "completions/min_terminated_length": 193.0, + "entropy": 0.30862087197601795, + "epoch": 0.06253860736575287, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17597267031669617, + "learning_rate": 1e-06, + "loss": 0.0223, + "num_tokens": 162922981.0, + "reward": 0.43359375, + "reward_std": 0.31944963335990906, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 367, + "tools/generated_tokens": 4902.4921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1263.9453125, + "completions/mean_terminated_length": 1147.923828125, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.26257198210805655, + "epoch": 0.0627090122904552, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13663767278194427, + "learning_rate": 1e-06, + "loss": 0.0072, + "num_tokens": 163325607.0, + "reward": 0.515625, + "reward_std": 0.17686697840690613, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 368, + "tools/generated_tokens": 3903.94921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1409.3984375, + "completions/mean_terminated_length": 1276.867919921875, + "completions/min_length": 373.0, + "completions/min_terminated_length": 373.0, + "entropy": 0.28452976047992706, + "epoch": 0.06287941721515752, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.19804418087005615, + "learning_rate": 1e-06, + "loss": 0.0198, + "num_tokens": 163768877.0, + "reward": 0.578125, + "reward_std": 0.3485180139541626, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 369, + "tools/generated_tokens": 4833.41015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1232.08984375, + "completions/mean_terminated_length": 1119.675537109375, + "completions/min_length": 300.0, + "completions/min_terminated_length": 300.0, + "entropy": 0.2565639251843095, + "epoch": 0.06304982213985984, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1488446742296219, + "learning_rate": 1e-06, + "loss": 0.0223, + "num_tokens": 164175940.0, + "reward": 0.53125, + "reward_std": 0.24017895758152008, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 370, + "tools/generated_tokens": 4232.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1324.9765625, + "completions/mean_terminated_length": 1013.9552612304688, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.28396870102733374, + "epoch": 0.06322022706456217, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1640467494726181, + "learning_rate": 1e-06, + "loss": 0.0338, + "num_tokens": 164608078.0, + "reward": 0.40234375, + "reward_std": 0.23206059634685516, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 371, + "tools/generated_tokens": 5060.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.82421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1427.19140625, + "completions/mean_terminated_length": 1232.98974609375, + "completions/min_length": 113.0, + "completions/min_terminated_length": 113.0, + "entropy": 0.2992200646549463, + "epoch": 0.06339063198926449, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19355005025863647, + "learning_rate": 1e-06, + "loss": 0.0016, + "num_tokens": 165063887.0, + "reward": 0.36328125, + "reward_std": 0.31170564889907837, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 372, + "tools/generated_tokens": 5099.19140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.79296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1239.25390625, + "completions/mean_terminated_length": 1166.98291015625, + "completions/min_length": 57.0, + "completions/min_terminated_length": 57.0, + "entropy": 0.28434338979423046, + "epoch": 0.06356103691396682, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.17379023134708405, + "learning_rate": 1e-06, + "loss": 0.0155, + "num_tokens": 165460608.0, + "reward": 0.4609375, + "reward_std": 0.31952911615371704, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 373, + "tools/generated_tokens": 3975.26171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1408.8984375, + "completions/mean_terminated_length": 1168.3763427734375, + "completions/min_length": 167.0, + "completions/min_terminated_length": 167.0, + "entropy": 0.3091003466397524, + "epoch": 0.06373144183866913, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1238866001367569, + "learning_rate": 1e-06, + "loss": 0.0056, + "num_tokens": 165904230.0, + "reward": 0.47265625, + "reward_std": 0.1471242755651474, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 374, + "tools/generated_tokens": 4888.9140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1331.87890625, + "completions/mean_terminated_length": 1233.21337890625, + "completions/min_length": 203.0, + "completions/min_terminated_length": 203.0, + "entropy": 0.3320555854588747, + "epoch": 0.06390184676337146, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17904691398143768, + "learning_rate": 1e-06, + "loss": 0.0066, + "num_tokens": 166336199.0, + "reward": 0.421875, + "reward_std": 0.3037048578262329, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 375, + "tools/generated_tokens": 5179.89453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1199.71875, + "completions/mean_terminated_length": 1087.114990234375, + "completions/min_length": 125.0, + "completions/min_terminated_length": 125.0, + "entropy": 0.26301499642431736, + "epoch": 0.06407225168807379, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14704641699790955, + "learning_rate": 1e-06, + "loss": -0.0169, + "num_tokens": 166730767.0, + "reward": 0.5234375, + "reward_std": 0.22964167594909668, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 376, + "tools/generated_tokens": 4127.72265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1252.3125, + "completions/mean_terminated_length": 1192.134521484375, + "completions/min_length": 252.0, + "completions/min_terminated_length": 252.0, + "entropy": 0.27136948611587286, + "epoch": 0.0642426566127761, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18955622613430023, + "learning_rate": 1e-06, + "loss": 0.0065, + "num_tokens": 167125375.0, + "reward": 0.73046875, + "reward_std": 0.22005823254585266, + "rewards/simpleverify_reward/mean": 0.73046875, + "rewards/simpleverify_reward/std": 0.44458550214767456, + "step": 377, + "tools/generated_tokens": 3732.31640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1210.23828125, + "completions/mean_terminated_length": 1127.5450439453125, + "completions/min_length": 299.0, + "completions/min_terminated_length": 299.0, + "entropy": 0.2912600552663207, + "epoch": 0.06441306153747843, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15348312258720398, + "learning_rate": 1e-06, + "loss": 0.028, + "num_tokens": 167525692.0, + "reward": 0.53515625, + "reward_std": 0.2748759984970093, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 378, + "tools/generated_tokens": 4418.2421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1383.22265625, + "completions/mean_terminated_length": 1166.2279052734375, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.2929877061396837, + "epoch": 0.06458346646218076, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1400548219680786, + "learning_rate": 1e-06, + "loss": 0.0432, + "num_tokens": 167961989.0, + "reward": 0.4765625, + "reward_std": 0.25640395283699036, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 379, + "tools/generated_tokens": 4695.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1380.73828125, + "completions/mean_terminated_length": 1210.6519775390625, + "completions/min_length": 162.0, + "completions/min_terminated_length": 162.0, + "entropy": 0.2998197767883539, + "epoch": 0.06475387138688309, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18942537903785706, + "learning_rate": 1e-06, + "loss": 0.015, + "num_tokens": 168400466.0, + "reward": 0.3515625, + "reward_std": 0.2543018162250519, + "rewards/simpleverify_reward/mean": 0.3515625, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 380, + "tools/generated_tokens": 4604.75, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1515.8515625, + "completions/mean_terminated_length": 1246.6529541015625, + "completions/min_length": 212.0, + "completions/min_terminated_length": 212.0, + "entropy": 0.2988246390596032, + "epoch": 0.0649242763115854, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12259532511234283, + "learning_rate": 1e-06, + "loss": -0.0066, + "num_tokens": 168874316.0, + "reward": 0.25, + "reward_std": 0.17693254351615906, + "rewards/simpleverify_reward/mean": 0.25, + "rewards/simpleverify_reward/std": 0.4338609278202057, + "step": 381, + "tools/generated_tokens": 5523.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1436.56640625, + "completions/mean_terminated_length": 1261.43212890625, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.2791782543063164, + "epoch": 0.06509468123628773, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1588088721036911, + "learning_rate": 1e-06, + "loss": 0.0342, + "num_tokens": 169317021.0, + "reward": 0.36328125, + "reward_std": 0.26489412784576416, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 382, + "tools/generated_tokens": 4196.578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1234.421875, + "completions/mean_terminated_length": 1142.4521484375, + "completions/min_length": 146.0, + "completions/min_terminated_length": 146.0, + "entropy": 0.2746760230511427, + "epoch": 0.06526508616099005, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16601525247097015, + "learning_rate": 1e-06, + "loss": 0.0063, + "num_tokens": 169703497.0, + "reward": 0.6171875, + "reward_std": 0.2743987441062927, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 383, + "tools/generated_tokens": 3290.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.00390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1408.7109375, + "completions/mean_terminated_length": 1148.7802734375, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.3064160402864218, + "epoch": 0.06543549108569238, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1455686241388321, + "learning_rate": 1e-06, + "loss": 0.0308, + "num_tokens": 170147087.0, + "reward": 0.3359375, + "reward_std": 0.18353557586669922, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 384, + "tools/generated_tokens": 4928.7265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1340.63671875, + "completions/mean_terminated_length": 1257.23583984375, + "completions/min_length": 365.0, + "completions/min_terminated_length": 365.0, + "entropy": 0.28463104739785194, + "epoch": 0.0656058960103947, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.1766914278268814, + "learning_rate": 1e-06, + "loss": -0.0055, + "num_tokens": 170572642.0, + "reward": 0.5390625, + "reward_std": 0.29113906621932983, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 385, + "tools/generated_tokens": 3908.6484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1337.703125, + "completions/mean_terminated_length": 1270.9273681640625, + "completions/min_length": 178.0, + "completions/min_terminated_length": 178.0, + "entropy": 0.32302073016762733, + "epoch": 0.06577630093509702, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15863491594791412, + "learning_rate": 1e-06, + "loss": -0.0072, + "num_tokens": 170997558.0, + "reward": 0.5546875, + "reward_std": 0.2906888723373413, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 386, + "tools/generated_tokens": 4369.703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1257.71484375, + "completions/mean_terminated_length": 1175.9654541015625, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.2963530384004116, + "epoch": 0.06594670585979935, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19245870411396027, + "learning_rate": 1e-06, + "loss": 0.0427, + "num_tokens": 171408445.0, + "reward": 0.4453125, + "reward_std": 0.23488396406173706, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 387, + "tools/generated_tokens": 4385.71875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.52734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1320.796875, + "completions/mean_terminated_length": 1197.93603515625, + "completions/min_length": 274.0, + "completions/min_terminated_length": 274.0, + "entropy": 0.2761593796312809, + "epoch": 0.06611711078450168, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.3170912563800812, + "learning_rate": 1e-06, + "loss": 0.0046, + "num_tokens": 171829161.0, + "reward": 0.5, + "reward_std": 0.215584397315979, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 388, + "tools/generated_tokens": 4424.8203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1341.34375, + "completions/mean_terminated_length": 1225.713623046875, + "completions/min_length": 220.0, + "completions/min_terminated_length": 220.0, + "entropy": 0.27688918076455593, + "epoch": 0.06628751570920399, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16212299466133118, + "learning_rate": 1e-06, + "loss": -0.0063, + "num_tokens": 172251361.0, + "reward": 0.4921875, + "reward_std": 0.24551981687545776, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 389, + "tools/generated_tokens": 4253.3984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1220.5625, + "completions/mean_terminated_length": 1085.1680908203125, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.3241068311035633, + "epoch": 0.06645792063390632, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18372248113155365, + "learning_rate": 1e-06, + "loss": 0.0169, + "num_tokens": 172645409.0, + "reward": 0.609375, + "reward_std": 0.1969657838344574, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 390, + "tools/generated_tokens": 4324.5703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2003.0, + "completions/mean_length": 1337.53125, + "completions/mean_terminated_length": 1152.0443115234375, + "completions/min_length": 196.0, + "completions/min_terminated_length": 196.0, + "entropy": 0.3025492988526821, + "epoch": 0.06662832555860865, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15253105759620667, + "learning_rate": 1e-06, + "loss": 0.0212, + "num_tokens": 173075817.0, + "reward": 0.44921875, + "reward_std": 0.29400384426116943, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 391, + "tools/generated_tokens": 5105.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.83984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1306.34765625, + "completions/mean_terminated_length": 1143.8905029296875, + "completions/min_length": 268.0, + "completions/min_terminated_length": 268.0, + "entropy": 0.2915899492800236, + "epoch": 0.06679873048331096, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17179858684539795, + "learning_rate": 1e-06, + "loss": 0.0369, + "num_tokens": 173500722.0, + "reward": 0.42578125, + "reward_std": 0.28256726264953613, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 392, + "tools/generated_tokens": 4706.25390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1271.87890625, + "completions/mean_terminated_length": 1180.3756103515625, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.31104396283626556, + "epoch": 0.06696913540801329, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16390515863895416, + "learning_rate": 1e-06, + "loss": -0.009, + "num_tokens": 173905283.0, + "reward": 0.35546875, + "reward_std": 0.25825291872024536, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 393, + "tools/generated_tokens": 3895.8984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1383.32421875, + "completions/mean_terminated_length": 1249.1455078125, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.2621934078633785, + "epoch": 0.06713954033271562, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1332361251115799, + "learning_rate": 1e-06, + "loss": -0.0255, + "num_tokens": 174330486.0, + "reward": 0.57421875, + "reward_std": 0.21093884110450745, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 394, + "tools/generated_tokens": 4095.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.32421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1356.14453125, + "completions/mean_terminated_length": 1148.944091796875, + "completions/min_length": 171.0, + "completions/min_terminated_length": 171.0, + "entropy": 0.29124689288437366, + "epoch": 0.06730994525741794, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.4636945426464081, + "learning_rate": 1e-06, + "loss": 0.0261, + "num_tokens": 174771403.0, + "reward": 0.30859375, + "reward_std": 0.29354849457740784, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 395, + "tools/generated_tokens": 5324.171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1178.43359375, + "completions/mean_terminated_length": 1120.4625244140625, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.2763081593438983, + "epoch": 0.06748035018212026, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16088153421878815, + "learning_rate": 1e-06, + "loss": -0.0123, + "num_tokens": 175163130.0, + "reward": 0.6328125, + "reward_std": 0.2552450895309448, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 396, + "tools/generated_tokens": 4002.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1222.60546875, + "completions/mean_terminated_length": 1160.1807861328125, + "completions/min_length": 111.0, + "completions/min_terminated_length": 111.0, + "entropy": 0.28180971182882786, + "epoch": 0.06765075510682259, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15223020315170288, + "learning_rate": 1e-06, + "loss": 0.0103, + "num_tokens": 175548981.0, + "reward": 0.38671875, + "reward_std": 0.21840627491474152, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 397, + "tools/generated_tokens": 3750.61328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1197.59375, + "completions/mean_terminated_length": 1121.5999755859375, + "completions/min_length": 235.0, + "completions/min_terminated_length": 235.0, + "entropy": 0.22976437583565712, + "epoch": 0.06782116003152491, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16082558035850525, + "learning_rate": 1e-06, + "loss": -0.0028, + "num_tokens": 175921229.0, + "reward": 0.51953125, + "reward_std": 0.17781277000904083, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 398, + "tools/generated_tokens": 3109.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.93359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1363.28515625, + "completions/mean_terminated_length": 1125.4368896484375, + "completions/min_length": 216.0, + "completions/min_terminated_length": 216.0, + "entropy": 0.2488960139453411, + "epoch": 0.06799156495622724, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16089093685150146, + "learning_rate": 1e-06, + "loss": 0.0171, + "num_tokens": 176348262.0, + "reward": 0.46875, + "reward_std": 0.2441575825214386, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 399, + "tools/generated_tokens": 4699.2890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1985.0, + "completions/mean_length": 1397.7265625, + "completions/mean_terminated_length": 1138.327880859375, + "completions/min_length": 202.0, + "completions/min_terminated_length": 202.0, + "entropy": 0.277536628767848, + "epoch": 0.06816196988092955, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1606374830007553, + "learning_rate": 1e-06, + "loss": 0.0506, + "num_tokens": 176792672.0, + "reward": 0.41015625, + "reward_std": 0.29895496368408203, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 400, + "tools/generated_tokens": 4837.7265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.03515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1170.4453125, + "completions/mean_terminated_length": 1138.4696044921875, + "completions/min_length": 201.0, + "completions/min_terminated_length": 201.0, + "entropy": 0.33787195198237896, + "epoch": 0.06833237480563188, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18944306671619415, + "learning_rate": 1e-06, + "loss": 0.0022, + "num_tokens": 177169746.0, + "reward": 0.4921875, + "reward_std": 0.3032139539718628, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 401, + "tools/generated_tokens": 3754.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.26171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1225.8828125, + "completions/mean_terminated_length": 1124.9210205078125, + "completions/min_length": 252.0, + "completions/min_terminated_length": 252.0, + "entropy": 0.31717364117503166, + "epoch": 0.06850277973033421, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18579499423503876, + "learning_rate": 1e-06, + "loss": 0.0167, + "num_tokens": 177569028.0, + "reward": 0.51953125, + "reward_std": 0.1944383680820465, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 402, + "tools/generated_tokens": 4225.8984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1217.55078125, + "completions/mean_terminated_length": 1063.763916015625, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.2915894640609622, + "epoch": 0.06867318465503654, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19436435401439667, + "learning_rate": 1e-06, + "loss": 0.0373, + "num_tokens": 177950817.0, + "reward": 0.6171875, + "reward_std": 0.281505823135376, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 403, + "tools/generated_tokens": 3849.55859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1212.453125, + "completions/mean_terminated_length": 1048.471923828125, + "completions/min_length": 193.0, + "completions/min_terminated_length": 193.0, + "entropy": 0.2983880825340748, + "epoch": 0.06884358957973885, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16463236510753632, + "learning_rate": 1e-06, + "loss": -0.0108, + "num_tokens": 178344005.0, + "reward": 0.453125, + "reward_std": 0.21348227560520172, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 404, + "tools/generated_tokens": 4068.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1428.3671875, + "completions/mean_terminated_length": 1226.1036376953125, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.3584884200245142, + "epoch": 0.06901399450444118, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1689015030860901, + "learning_rate": 1e-06, + "loss": 0.0124, + "num_tokens": 178790611.0, + "reward": 0.3046875, + "reward_std": 0.20624089241027832, + "rewards/simpleverify_reward/mean": 0.3046875, + "rewards/simpleverify_reward/std": 0.4611765742301941, + "step": 405, + "tools/generated_tokens": 4924.375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.70703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1483.45703125, + "completions/mean_terminated_length": 1283.3280029296875, + "completions/min_length": 248.0, + "completions/min_terminated_length": 248.0, + "entropy": 0.26528845727443695, + "epoch": 0.0691843994291435, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1359315812587738, + "learning_rate": 1e-06, + "loss": 0.0356, + "num_tokens": 179252552.0, + "reward": 0.30078125, + "reward_std": 0.19966495037078857, + "rewards/simpleverify_reward/mean": 0.30078125, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 406, + "tools/generated_tokens": 5083.46484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1210.6484375, + "completions/mean_terminated_length": 1127.991455078125, + "completions/min_length": 186.0, + "completions/min_terminated_length": 186.0, + "entropy": 0.2559625366702676, + "epoch": 0.06935480435384582, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15389494597911835, + "learning_rate": 1e-06, + "loss": 0.0007, + "num_tokens": 179638510.0, + "reward": 0.59375, + "reward_std": 0.2542886435985565, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 407, + "tools/generated_tokens": 3730.65625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.23046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1268.90234375, + "completions/mean_terminated_length": 1124.625, + "completions/min_length": 327.0, + "completions/min_terminated_length": 327.0, + "entropy": 0.2988923639059067, + "epoch": 0.06952520927854815, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17689840495586395, + "learning_rate": 1e-06, + "loss": -0.0181, + "num_tokens": 180049253.0, + "reward": 0.47265625, + "reward_std": 0.2333115190267563, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 408, + "tools/generated_tokens": 4300.91796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1225.34765625, + "completions/mean_terminated_length": 1112.0045166015625, + "completions/min_length": 222.0, + "completions/min_terminated_length": 222.0, + "entropy": 0.2627219529822469, + "epoch": 0.06969561420325047, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16810616850852966, + "learning_rate": 1e-06, + "loss": 0.0083, + "num_tokens": 180438814.0, + "reward": 0.5625, + "reward_std": 0.21519789099693298, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 409, + "tools/generated_tokens": 3905.359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1233.49609375, + "completions/mean_terminated_length": 1137.462890625, + "completions/min_length": 158.0, + "completions/min_terminated_length": 158.0, + "entropy": 0.2939452510327101, + "epoch": 0.0698660191279528, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17348815500736237, + "learning_rate": 1e-06, + "loss": 0.0032, + "num_tokens": 180828685.0, + "reward": 0.54296875, + "reward_std": 0.26400476694107056, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 410, + "tools/generated_tokens": 3881.49609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1327.0078125, + "completions/mean_terminated_length": 1181.4554443359375, + "completions/min_length": 166.0, + "completions/min_terminated_length": 166.0, + "entropy": 0.23369611985981464, + "epoch": 0.07003642405265512, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.11177127063274384, + "learning_rate": 1e-06, + "loss": -0.0013, + "num_tokens": 181241343.0, + "reward": 0.37109375, + "reward_std": 0.12806200981140137, + "rewards/simpleverify_reward/mean": 0.37109375, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 411, + "tools/generated_tokens": 4359.03125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1251.8046875, + "completions/mean_terminated_length": 1108.709716796875, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.27532170712947845, + "epoch": 0.07020682897735744, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2053053230047226, + "learning_rate": 1e-06, + "loss": -0.0021, + "num_tokens": 181650989.0, + "reward": 0.3671875, + "reward_std": 0.26396670937538147, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 412, + "tools/generated_tokens": 3947.80859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1393.99609375, + "completions/mean_terminated_length": 1193.790771484375, + "completions/min_length": 105.0, + "completions/min_terminated_length": 105.0, + "entropy": 0.30840983986854553, + "epoch": 0.07037723390205977, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12739481031894684, + "learning_rate": 1e-06, + "loss": 0.0068, + "num_tokens": 182089356.0, + "reward": 0.42578125, + "reward_std": 0.12709102034568787, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 413, + "tools/generated_tokens": 4786.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1261.96484375, + "completions/mean_terminated_length": 1149.6741943359375, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.28878416679799557, + "epoch": 0.0705476388267621, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17790941894054413, + "learning_rate": 1e-06, + "loss": 0.0202, + "num_tokens": 182495619.0, + "reward": 0.59765625, + "reward_std": 0.23958192765712738, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 414, + "tools/generated_tokens": 4205.96484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1220.296875, + "completions/mean_terminated_length": 1161.422607421875, + "completions/min_length": 161.0, + "completions/min_terminated_length": 161.0, + "entropy": 0.25535366870462894, + "epoch": 0.07071804375146441, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18710988759994507, + "learning_rate": 1e-06, + "loss": -0.0056, + "num_tokens": 182890479.0, + "reward": 0.56640625, + "reward_std": 0.3078889548778534, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 415, + "tools/generated_tokens": 4036.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1410.53125, + "completions/mean_terminated_length": 1219.6141357421875, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.2561458731070161, + "epoch": 0.07088844867616674, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.09066380560398102, + "learning_rate": 1e-06, + "loss": -0.0188, + "num_tokens": 183313159.0, + "reward": 0.31640625, + "reward_std": 0.10341504216194153, + "rewards/simpleverify_reward/mean": 0.31640625, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 416, + "tools/generated_tokens": 3962.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2017.0, + "completions/mean_length": 1213.296875, + "completions/mean_terminated_length": 1142.559326171875, + "completions/min_length": 199.0, + "completions/min_terminated_length": 199.0, + "entropy": 0.2706059282645583, + "epoch": 0.07105885360086907, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16542141139507294, + "learning_rate": 1e-06, + "loss": 0.0148, + "num_tokens": 183698467.0, + "reward": 0.484375, + "reward_std": 0.2505345940589905, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 417, + "tools/generated_tokens": 3821.30859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1220.109375, + "completions/mean_terminated_length": 1080.2374267578125, + "completions/min_length": 126.0, + "completions/min_terminated_length": 126.0, + "entropy": 0.2748273015022278, + "epoch": 0.0712292585255714, + "frac_reward_zero_std": 0.0625, + "grad_norm": 0.2127537876367569, + "learning_rate": 1e-06, + "loss": 0.0378, + "num_tokens": 184094063.0, + "reward": 0.4296875, + "reward_std": 0.3741224706172943, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 418, + "tools/generated_tokens": 4364.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1334.8203125, + "completions/mean_terminated_length": 1206.6451416015625, + "completions/min_length": 148.0, + "completions/min_terminated_length": 148.0, + "entropy": 0.3021557554602623, + "epoch": 0.07139966345027371, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17256174981594086, + "learning_rate": 1e-06, + "loss": 0.0096, + "num_tokens": 184523761.0, + "reward": 0.515625, + "reward_std": 0.30045706033706665, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 419, + "tools/generated_tokens": 4462.8203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.52734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1204.37890625, + "completions/mean_terminated_length": 1100.7763671875, + "completions/min_length": 196.0, + "completions/min_terminated_length": 196.0, + "entropy": 0.3174930810928345, + "epoch": 0.07157006837497604, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1809980571269989, + "learning_rate": 1e-06, + "loss": -0.0065, + "num_tokens": 184916194.0, + "reward": 0.453125, + "reward_std": 0.2715497612953186, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 420, + "tools/generated_tokens": 4492.3984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1234.1796875, + "completions/mean_terminated_length": 1187.09912109375, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.2462693229317665, + "epoch": 0.07174047329967836, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13745740056037903, + "learning_rate": 1e-06, + "loss": 0.025, + "num_tokens": 185301792.0, + "reward": 0.5859375, + "reward_std": 0.24063712358474731, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 421, + "tools/generated_tokens": 3666.19140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1472.109375, + "completions/mean_terminated_length": 1255.3763427734375, + "completions/min_length": 222.0, + "completions/min_terminated_length": 222.0, + "entropy": 0.298484243452549, + "epoch": 0.07191087822438068, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14522314071655273, + "learning_rate": 1e-06, + "loss": -0.0077, + "num_tokens": 185750588.0, + "reward": 0.36328125, + "reward_std": 0.16516819596290588, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 422, + "tools/generated_tokens": 4536.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1276.61328125, + "completions/mean_terminated_length": 1185.663818359375, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "entropy": 0.28468674700707197, + "epoch": 0.072081283149083, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2937934994697571, + "learning_rate": 1e-06, + "loss": 0.021, + "num_tokens": 186154649.0, + "reward": 0.52734375, + "reward_std": 0.25124263763427734, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 423, + "tools/generated_tokens": 3772.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.21875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1099.46875, + "completions/mean_terminated_length": 1076.7041015625, + "completions/min_length": 173.0, + "completions/min_terminated_length": 173.0, + "entropy": 0.26107916329056025, + "epoch": 0.07225168807378533, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19994470477104187, + "learning_rate": 1e-06, + "loss": 0.0205, + "num_tokens": 186515265.0, + "reward": 0.56640625, + "reward_std": 0.32424497604370117, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 424, + "tools/generated_tokens": 3523.4765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.18359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1287.51953125, + "completions/mean_terminated_length": 1125.331787109375, + "completions/min_length": 116.0, + "completions/min_terminated_length": 116.0, + "entropy": 0.25633655954152346, + "epoch": 0.07242209299848766, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14257393777370453, + "learning_rate": 1e-06, + "loss": -0.0137, + "num_tokens": 186934326.0, + "reward": 0.53515625, + "reward_std": 0.2603171467781067, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 425, + "tools/generated_tokens": 4455.51953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1180.10546875, + "completions/mean_terminated_length": 1098.508544921875, + "completions/min_length": 243.0, + "completions/min_terminated_length": 243.0, + "entropy": 0.2560670170933008, + "epoch": 0.07259249792318997, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19285213947296143, + "learning_rate": 1e-06, + "loss": -0.0027, + "num_tokens": 187311873.0, + "reward": 0.51953125, + "reward_std": 0.20786382257938385, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 426, + "tools/generated_tokens": 3868.11328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1213.4921875, + "completions/mean_terminated_length": 1165.21484375, + "completions/min_length": 277.0, + "completions/min_terminated_length": 277.0, + "entropy": 0.28291317261755466, + "epoch": 0.0727629028478923, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15356355905532837, + "learning_rate": 1e-06, + "loss": 0.0003, + "num_tokens": 187701711.0, + "reward": 0.48046875, + "reward_std": 0.2604663670063019, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 427, + "tools/generated_tokens": 3693.5, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1270.1953125, + "completions/mean_terminated_length": 1142.9227294921875, + "completions/min_length": 120.0, + "completions/min_terminated_length": 120.0, + "entropy": 0.2575987661257386, + "epoch": 0.07293330777259463, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17822514474391937, + "learning_rate": 1e-06, + "loss": 0.0224, + "num_tokens": 188112161.0, + "reward": 0.40234375, + "reward_std": 0.30202803015708923, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 428, + "tools/generated_tokens": 4302.203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1259.6640625, + "completions/mean_terminated_length": 1162.850830078125, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.27724962681531906, + "epoch": 0.07310371269729696, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19279025495052338, + "learning_rate": 1e-06, + "loss": 0.0099, + "num_tokens": 188509275.0, + "reward": 0.4296875, + "reward_std": 0.18181806802749634, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 429, + "tools/generated_tokens": 3979.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1226.28125, + "completions/mean_terminated_length": 1171.5042724609375, + "completions/min_length": 38.0, + "completions/min_terminated_length": 38.0, + "entropy": 0.23214791808277369, + "epoch": 0.07327411762199927, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14280451834201813, + "learning_rate": 1e-06, + "loss": 0.0115, + "num_tokens": 188888307.0, + "reward": 0.62109375, + "reward_std": 0.2399258315563202, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 430, + "tools/generated_tokens": 3314.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.01953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1379.44140625, + "completions/mean_terminated_length": 1233.0, + "completions/min_length": 233.0, + "completions/min_terminated_length": 233.0, + "entropy": 0.254175859503448, + "epoch": 0.0734445225467016, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.11756816506385803, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 189320612.0, + "reward": 0.37890625, + "reward_std": 0.14326362311840057, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 431, + "tools/generated_tokens": 4315.44921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1216.16015625, + "completions/mean_terminated_length": 1141.825439453125, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.24923141486942768, + "epoch": 0.07361492747140393, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.22574414312839508, + "learning_rate": 1e-06, + "loss": 0.0092, + "num_tokens": 189717629.0, + "reward": 0.578125, + "reward_std": 0.30700400471687317, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 432, + "tools/generated_tokens": 4016.19140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1232.28515625, + "completions/mean_terminated_length": 1094.47021484375, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.29618942365050316, + "epoch": 0.07378533239610625, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15020115673542023, + "learning_rate": 1e-06, + "loss": 0.0089, + "num_tokens": 190117366.0, + "reward": 0.48046875, + "reward_std": 0.2005864679813385, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 433, + "tools/generated_tokens": 4368.296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1123.3359375, + "completions/mean_terminated_length": 1069.8470458984375, + "completions/min_length": 248.0, + "completions/min_terminated_length": 248.0, + "entropy": 0.32763964496552944, + "epoch": 0.07395573732080857, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17810696363449097, + "learning_rate": 1e-06, + "loss": -0.0048, + "num_tokens": 190482876.0, + "reward": 0.5, + "reward_std": 0.2815170884132385, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 434, + "tools/generated_tokens": 3739.34765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.27734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1209.07421875, + "completions/mean_terminated_length": 1053.7176513671875, + "completions/min_length": 139.0, + "completions/min_terminated_length": 139.0, + "entropy": 0.2867574654519558, + "epoch": 0.0741261422455109, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15940769016742706, + "learning_rate": 1e-06, + "loss": 0.0198, + "num_tokens": 190873311.0, + "reward": 0.47265625, + "reward_std": 0.19721892476081848, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 435, + "tools/generated_tokens": 4113.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1183.37890625, + "completions/mean_terminated_length": 1089.80517578125, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.30609723739326, + "epoch": 0.07429654717021322, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15700900554656982, + "learning_rate": 1e-06, + "loss": 0.0043, + "num_tokens": 191261760.0, + "reward": 0.5390625, + "reward_std": 0.19366663694381714, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 436, + "tools/generated_tokens": 3959.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1198.43359375, + "completions/mean_terminated_length": 1152.9835205078125, + "completions/min_length": 201.0, + "completions/min_terminated_length": 201.0, + "entropy": 0.2891850499436259, + "epoch": 0.07446695209491554, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18941472470760345, + "learning_rate": 1e-06, + "loss": 0.0214, + "num_tokens": 191650191.0, + "reward": 0.5390625, + "reward_std": 0.23095625638961792, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 437, + "tools/generated_tokens": 4054.4375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1216.796875, + "completions/mean_terminated_length": 1153.932861328125, + "completions/min_length": 69.0, + "completions/min_terminated_length": 69.0, + "entropy": 0.2855268847197294, + "epoch": 0.07463735701961786, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1926969736814499, + "learning_rate": 1e-06, + "loss": 0.009, + "num_tokens": 192038699.0, + "reward": 0.671875, + "reward_std": 0.2702304720878601, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 438, + "tools/generated_tokens": 3680.80859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1318.4296875, + "completions/mean_terminated_length": 1171.1455078125, + "completions/min_length": 117.0, + "completions/min_terminated_length": 117.0, + "entropy": 0.2969972314313054, + "epoch": 0.07480776194432019, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17637549340724945, + "learning_rate": 1e-06, + "loss": 0.0505, + "num_tokens": 192461145.0, + "reward": 0.56640625, + "reward_std": 0.30230605602264404, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 439, + "tools/generated_tokens": 4486.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1308.47265625, + "completions/mean_terminated_length": 1199.035888671875, + "completions/min_length": 190.0, + "completions/min_terminated_length": 190.0, + "entropy": 0.2886236198246479, + "epoch": 0.07497816686902252, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2587954103946686, + "learning_rate": 1e-06, + "loss": 0.0174, + "num_tokens": 192875586.0, + "reward": 0.4296875, + "reward_std": 0.24933947622776031, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 440, + "tools/generated_tokens": 4124.48828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1242.453125, + "completions/mean_terminated_length": 1195.8553466796875, + "completions/min_length": 185.0, + "completions/min_terminated_length": 185.0, + "entropy": 0.25153734255582094, + "epoch": 0.07514857179372483, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19918447732925415, + "learning_rate": 1e-06, + "loss": 0.0287, + "num_tokens": 193265318.0, + "reward": 0.56640625, + "reward_std": 0.2962125539779663, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 441, + "tools/generated_tokens": 3346.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.02734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1180.1328125, + "completions/mean_terminated_length": 1028.8531494140625, + "completions/min_length": 57.0, + "completions/min_terminated_length": 57.0, + "entropy": 0.25766815803945065, + "epoch": 0.07531897671842716, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11928673088550568, + "learning_rate": 1e-06, + "loss": -0.0131, + "num_tokens": 193647912.0, + "reward": 0.4921875, + "reward_std": 0.1331464648246765, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 442, + "tools/generated_tokens": 3788.14453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1153.38671875, + "completions/mean_terminated_length": 1081.6666259765625, + "completions/min_length": 107.0, + "completions/min_terminated_length": 107.0, + "entropy": 0.26415857393294573, + "epoch": 0.07548938164312949, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16005739569664001, + "learning_rate": 1e-06, + "loss": 0.0065, + "num_tokens": 194023099.0, + "reward": 0.46875, + "reward_std": 0.19588851928710938, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 443, + "tools/generated_tokens": 3849.41015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1388.5625, + "completions/mean_terminated_length": 1199.683349609375, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.2570499451830983, + "epoch": 0.07565978656783182, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18124040961265564, + "learning_rate": 1e-06, + "loss": 0.0588, + "num_tokens": 194464571.0, + "reward": 0.46484375, + "reward_std": 0.36255943775177, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 444, + "tools/generated_tokens": 4996.56640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1201.9765625, + "completions/mean_terminated_length": 1076.7802734375, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.28643600922077894, + "epoch": 0.07583019149253413, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.20645000040531158, + "learning_rate": 1e-06, + "loss": 0.0179, + "num_tokens": 194853605.0, + "reward": 0.421875, + "reward_std": 0.21730193495750427, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 445, + "tools/generated_tokens": 4297.9921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1238.63671875, + "completions/mean_terminated_length": 1181.06689453125, + "completions/min_length": 236.0, + "completions/min_terminated_length": 236.0, + "entropy": 0.2869391664862633, + "epoch": 0.07600059641723646, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.21641193330287933, + "learning_rate": 1e-06, + "loss": 0.0019, + "num_tokens": 195259576.0, + "reward": 0.4609375, + "reward_std": 0.35176295042037964, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 446, + "tools/generated_tokens": 4310.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.04296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1249.00390625, + "completions/mean_terminated_length": 1213.1346435546875, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.2864510640501976, + "epoch": 0.07617100134193878, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1885346919298172, + "learning_rate": 1e-06, + "loss": 0.0213, + "num_tokens": 195659401.0, + "reward": 0.71875, + "reward_std": 0.31807005405426025, + "rewards/simpleverify_reward/mean": 0.71875, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 447, + "tools/generated_tokens": 3633.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1208.94921875, + "completions/mean_terminated_length": 1130.064208984375, + "completions/min_length": 275.0, + "completions/min_terminated_length": 275.0, + "entropy": 0.267677903175354, + "epoch": 0.07634140626664111, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1536675989627838, + "learning_rate": 1e-06, + "loss": 0.0175, + "num_tokens": 196054684.0, + "reward": 0.62890625, + "reward_std": 0.24845923483371735, + "rewards/simpleverify_reward/mean": 0.62890625, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 448, + "tools/generated_tokens": 4080.9453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.04296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1090.375, + "completions/mean_terminated_length": 1047.3795166015625, + "completions/min_length": 99.0, + "completions/min_terminated_length": 99.0, + "entropy": 0.28707336355000734, + "epoch": 0.07651181119134343, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2251126617193222, + "learning_rate": 1e-06, + "loss": -0.0075, + "num_tokens": 196415692.0, + "reward": 0.66015625, + "reward_std": 0.2708975076675415, + "rewards/simpleverify_reward/mean": 0.66015625, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 449, + "tools/generated_tokens": 3658.4140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1356.484375, + "completions/mean_terminated_length": 1216.8826904296875, + "completions/min_length": 210.0, + "completions/min_terminated_length": 210.0, + "entropy": 0.3043972812592983, + "epoch": 0.07668221611604575, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17533689737319946, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 196856664.0, + "reward": 0.359375, + "reward_std": 0.26049065589904785, + "rewards/simpleverify_reward/mean": 0.359375, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 450, + "tools/generated_tokens": 4900.48046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1219.20703125, + "completions/mean_terminated_length": 1087.954833984375, + "completions/min_length": 116.0, + "completions/min_terminated_length": 116.0, + "entropy": 0.2809931878000498, + "epoch": 0.07685262104074808, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16633576154708862, + "learning_rate": 1e-06, + "loss": 0.0253, + "num_tokens": 197253501.0, + "reward": 0.42578125, + "reward_std": 0.17023906111717224, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 451, + "tools/generated_tokens": 4251.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1251.37890625, + "completions/mean_terminated_length": 1086.04248046875, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.24770265072584152, + "epoch": 0.0770230259654504, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17578484117984772, + "learning_rate": 1e-06, + "loss": 0.0258, + "num_tokens": 197647518.0, + "reward": 0.609375, + "reward_std": 0.2597846984863281, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 452, + "tools/generated_tokens": 4115.3828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2011.0, + "completions/mean_length": 1456.3125, + "completions/mean_terminated_length": 1259.088623046875, + "completions/min_length": 353.0, + "completions/min_terminated_length": 353.0, + "entropy": 0.2675662850961089, + "epoch": 0.07719343089015272, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11961816996335983, + "learning_rate": 1e-06, + "loss": 0.0033, + "num_tokens": 198102958.0, + "reward": 0.46875, + "reward_std": 0.17978152632713318, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 453, + "tools/generated_tokens": 4840.328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1183.78125, + "completions/mean_terminated_length": 1118.420166015625, + "completions/min_length": 287.0, + "completions/min_terminated_length": 287.0, + "entropy": 0.27085812017321587, + "epoch": 0.07736383581485505, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1622786670923233, + "learning_rate": 1e-06, + "loss": 0.0143, + "num_tokens": 198496246.0, + "reward": 0.3828125, + "reward_std": 0.29786184430122375, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 454, + "tools/generated_tokens": 3959.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1303.62109375, + "completions/mean_terminated_length": 1201.062255859375, + "completions/min_length": 206.0, + "completions/min_terminated_length": 206.0, + "entropy": 0.2089649671688676, + "epoch": 0.07753424073955738, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16132892668247223, + "learning_rate": 1e-06, + "loss": -0.0024, + "num_tokens": 198909637.0, + "reward": 0.4921875, + "reward_std": 0.2361333966255188, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 455, + "tools/generated_tokens": 4207.63671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1254.03125, + "completions/mean_terminated_length": 1144.6400146484375, + "completions/min_length": 120.0, + "completions/min_terminated_length": 120.0, + "entropy": 0.32212772220373154, + "epoch": 0.07770464566425969, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19194775819778442, + "learning_rate": 1e-06, + "loss": 0.0269, + "num_tokens": 199321965.0, + "reward": 0.62890625, + "reward_std": 0.3075515925884247, + "rewards/simpleverify_reward/mean": 0.62890625, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 456, + "tools/generated_tokens": 4414.0390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1311.28515625, + "completions/mean_terminated_length": 1154.1658935546875, + "completions/min_length": 272.0, + "completions/min_terminated_length": 272.0, + "entropy": 0.255185229703784, + "epoch": 0.07787505058896202, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14567583799362183, + "learning_rate": 1e-06, + "loss": 0.0293, + "num_tokens": 199748310.0, + "reward": 0.4609375, + "reward_std": 0.24079477787017822, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 457, + "tools/generated_tokens": 4831.2890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1481.765625, + "completions/mean_terminated_length": 1233.6517333984375, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.2644712319597602, + "epoch": 0.07804545551366435, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16193120181560516, + "learning_rate": 1e-06, + "loss": 0.0378, + "num_tokens": 200209066.0, + "reward": 0.3828125, + "reward_std": 0.246619313955307, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 458, + "tools/generated_tokens": 5169.7734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1182.06640625, + "completions/mean_terminated_length": 1071.4404296875, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.2632291382178664, + "epoch": 0.07821586043836667, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15308211743831635, + "learning_rate": 1e-06, + "loss": 0.0005, + "num_tokens": 200590059.0, + "reward": 0.57421875, + "reward_std": 0.22797390818595886, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 459, + "tools/generated_tokens": 3726.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1369.81640625, + "completions/mean_terminated_length": 1188.5296630859375, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.25467612966895103, + "epoch": 0.07838626536306899, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14648422598838806, + "learning_rate": 1e-06, + "loss": 0.0222, + "num_tokens": 201017660.0, + "reward": 0.5, + "reward_std": 0.22765710949897766, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 460, + "tools/generated_tokens": 4697.83203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1451.5390625, + "completions/mean_terminated_length": 1252.71875, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "entropy": 0.2917803544551134, + "epoch": 0.07855667028777132, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.09677249938249588, + "learning_rate": 1e-06, + "loss": 0.0202, + "num_tokens": 201466006.0, + "reward": 0.51171875, + "reward_std": 0.10881631076335907, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 461, + "tools/generated_tokens": 4659.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1271.828125, + "completions/mean_terminated_length": 1160.946533203125, + "completions/min_length": 270.0, + "completions/min_terminated_length": 270.0, + "entropy": 0.25832536444067955, + "epoch": 0.07872707521247364, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.122776098549366, + "learning_rate": 1e-06, + "loss": 0.0221, + "num_tokens": 201873818.0, + "reward": 0.4140625, + "reward_std": 0.15984314680099487, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 462, + "tools/generated_tokens": 4095.8515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1353.0546875, + "completions/mean_terminated_length": 1228.1658935546875, + "completions/min_length": 129.0, + "completions/min_terminated_length": 129.0, + "entropy": 0.254767038859427, + "epoch": 0.07889748013717597, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1598517894744873, + "learning_rate": 1e-06, + "loss": 0.015, + "num_tokens": 202298888.0, + "reward": 0.58203125, + "reward_std": 0.28796231746673584, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 463, + "tools/generated_tokens": 4529.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1132.15625, + "completions/mean_terminated_length": 1083.160400390625, + "completions/min_length": 319.0, + "completions/min_terminated_length": 319.0, + "entropy": 0.25874380860477686, + "epoch": 0.07906788506187828, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16896232962608337, + "learning_rate": 1e-06, + "loss": 0.0008, + "num_tokens": 202664832.0, + "reward": 0.54296875, + "reward_std": 0.2746518850326538, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 464, + "tools/generated_tokens": 3596.16015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1342.31640625, + "completions/mean_terminated_length": 1237.887939453125, + "completions/min_length": 401.0, + "completions/min_terminated_length": 401.0, + "entropy": 0.29393049143254757, + "epoch": 0.07923828998658061, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17599904537200928, + "learning_rate": 1e-06, + "loss": 0.021, + "num_tokens": 203086417.0, + "reward": 0.40625, + "reward_std": 0.33642083406448364, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 465, + "tools/generated_tokens": 4462.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1273.578125, + "completions/mean_terminated_length": 1166.8800048828125, + "completions/min_length": 205.0, + "completions/min_terminated_length": 205.0, + "entropy": 0.27936690114438534, + "epoch": 0.07940869491128294, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17370912432670593, + "learning_rate": 1e-06, + "loss": 0.0063, + "num_tokens": 203485589.0, + "reward": 0.41796875, + "reward_std": 0.22044281661510468, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 466, + "tools/generated_tokens": 3977.59375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1350.75, + "completions/mean_terminated_length": 1209.9906005859375, + "completions/min_length": 107.0, + "completions/min_terminated_length": 107.0, + "entropy": 0.2872124407440424, + "epoch": 0.07957909983598525, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19319450855255127, + "learning_rate": 1e-06, + "loss": -0.0007, + "num_tokens": 203908805.0, + "reward": 0.53515625, + "reward_std": 0.3216220736503601, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 467, + "tools/generated_tokens": 4550.75390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1389.359375, + "completions/mean_terminated_length": 1248.8909912109375, + "completions/min_length": 421.0, + "completions/min_terminated_length": 421.0, + "entropy": 0.2329869018867612, + "epoch": 0.07974950476068758, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17986977100372314, + "learning_rate": 1e-06, + "loss": 0.0259, + "num_tokens": 204350673.0, + "reward": 0.5703125, + "reward_std": 0.31593742966651917, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 468, + "tools/generated_tokens": 4605.3671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1333.98828125, + "completions/mean_terminated_length": 1220.9140625, + "completions/min_length": 250.0, + "completions/min_terminated_length": 250.0, + "entropy": 0.28531682677567005, + "epoch": 0.07991990968538991, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18188199400901794, + "learning_rate": 1e-06, + "loss": 0.0017, + "num_tokens": 204771134.0, + "reward": 0.390625, + "reward_std": 0.20597386360168457, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 469, + "tools/generated_tokens": 4525.99609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1306.9140625, + "completions/mean_terminated_length": 1197.251220703125, + "completions/min_length": 218.0, + "completions/min_terminated_length": 218.0, + "entropy": 0.28891819529235363, + "epoch": 0.08009031461009224, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13076718151569366, + "learning_rate": 1e-06, + "loss": -0.0272, + "num_tokens": 205190696.0, + "reward": 0.33203125, + "reward_std": 0.16746041178703308, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 470, + "tools/generated_tokens": 4394.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1403.59765625, + "completions/mean_terminated_length": 1227.2686767578125, + "completions/min_length": 115.0, + "completions/min_terminated_length": 115.0, + "entropy": 0.27110878843814135, + "epoch": 0.08026071953479455, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15282106399536133, + "learning_rate": 1e-06, + "loss": 0.0136, + "num_tokens": 205632289.0, + "reward": 0.4453125, + "reward_std": 0.2556079626083374, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 471, + "tools/generated_tokens": 4979.60546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.74609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1357.50390625, + "completions/mean_terminated_length": 1117.657958984375, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.29090402089059353, + "epoch": 0.08043112445949688, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16128665208816528, + "learning_rate": 1e-06, + "loss": 0.003, + "num_tokens": 206064386.0, + "reward": 0.296875, + "reward_std": 0.2601088881492615, + "rewards/simpleverify_reward/mean": 0.296875, + "rewards/simpleverify_reward/std": 0.45777595043182373, + "step": 472, + "tools/generated_tokens": 4989.51171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1240.890625, + "completions/mean_terminated_length": 1190.6556396484375, + "completions/min_length": 251.0, + "completions/min_terminated_length": 251.0, + "entropy": 0.26050027180463076, + "epoch": 0.0806015293841992, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1341354250907898, + "learning_rate": 1e-06, + "loss": 0.0062, + "num_tokens": 206456918.0, + "reward": 0.61328125, + "reward_std": 0.20685215294361115, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 473, + "tools/generated_tokens": 3656.90625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1108.60546875, + "completions/mean_terminated_length": 1045.979248046875, + "completions/min_length": 146.0, + "completions/min_terminated_length": 146.0, + "entropy": 0.28258791379630566, + "epoch": 0.08077193430890153, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17450235784053802, + "learning_rate": 1e-06, + "loss": 0.0104, + "num_tokens": 206825489.0, + "reward": 0.42578125, + "reward_std": 0.26895391941070557, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 474, + "tools/generated_tokens": 4012.61328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1233.0859375, + "completions/mean_terminated_length": 1213.528076171875, + "completions/min_length": 148.0, + "completions/min_terminated_length": 148.0, + "entropy": 0.2500674147158861, + "epoch": 0.08094233923360385, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16132110357284546, + "learning_rate": 1e-06, + "loss": -0.0027, + "num_tokens": 207212983.0, + "reward": 0.6953125, + "reward_std": 0.19828036427497864, + "rewards/simpleverify_reward/mean": 0.6953125, + "rewards/simpleverify_reward/std": 0.4611765742301941, + "step": 475, + "tools/generated_tokens": 3041.109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.8828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1251.67578125, + "completions/mean_terminated_length": 1165.4935302734375, + "completions/min_length": 171.0, + "completions/min_terminated_length": 171.0, + "entropy": 0.22036410216242075, + "epoch": 0.08111274415830617, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13216455280780792, + "learning_rate": 1e-06, + "loss": 0.0018, + "num_tokens": 207598996.0, + "reward": 0.6328125, + "reward_std": 0.14635254442691803, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 476, + "tools/generated_tokens": 3235.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.96875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1377.046875, + "completions/mean_terminated_length": 1222.2164306640625, + "completions/min_length": 166.0, + "completions/min_terminated_length": 166.0, + "entropy": 0.29438223876059055, + "epoch": 0.0812831490830085, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12111736834049225, + "learning_rate": 1e-06, + "loss": 0.0208, + "num_tokens": 208025472.0, + "reward": 0.5, + "reward_std": 0.14139671623706818, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 477, + "tools/generated_tokens": 4449.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1309.7265625, + "completions/mean_terminated_length": 1185.0, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.2841954305768013, + "epoch": 0.08145355400771083, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.19221487641334534, + "learning_rate": 1e-06, + "loss": 0.0023, + "num_tokens": 208446010.0, + "reward": 0.60546875, + "reward_std": 0.29445403814315796, + "rewards/simpleverify_reward/mean": 0.60546875, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 478, + "tools/generated_tokens": 4565.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1241.0390625, + "completions/mean_terminated_length": 1108.9908447265625, + "completions/min_length": 62.0, + "completions/min_terminated_length": 62.0, + "entropy": 0.28579610772430897, + "epoch": 0.08162395893241314, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1440448760986328, + "learning_rate": 1e-06, + "loss": 0.0337, + "num_tokens": 208845780.0, + "reward": 0.5390625, + "reward_std": 0.23425540328025818, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 479, + "tools/generated_tokens": 4241.04296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1203.67578125, + "completions/mean_terminated_length": 1091.5972900390625, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.2577935494482517, + "epoch": 0.08179436385711547, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14196252822875977, + "learning_rate": 1e-06, + "loss": 0.0175, + "num_tokens": 209231537.0, + "reward": 0.42578125, + "reward_std": 0.16461142897605896, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 480, + "tools/generated_tokens": 3891.671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1166.56640625, + "completions/mean_terminated_length": 1107.80419921875, + "completions/min_length": 253.0, + "completions/min_terminated_length": 253.0, + "entropy": 0.23823885526508093, + "epoch": 0.0819647687818178, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15863071382045746, + "learning_rate": 1e-06, + "loss": 0.007, + "num_tokens": 209620642.0, + "reward": 0.5234375, + "reward_std": 0.25263863801956177, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 481, + "tools/generated_tokens": 3574.578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.17578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1248.40625, + "completions/mean_terminated_length": 1134.1785888671875, + "completions/min_length": 238.0, + "completions/min_terminated_length": 238.0, + "entropy": 0.29295533522963524, + "epoch": 0.08213517370652011, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16509659588336945, + "learning_rate": 1e-06, + "loss": 0.0114, + "num_tokens": 210017354.0, + "reward": 0.5, + "reward_std": 0.20294174551963806, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 482, + "tools/generated_tokens": 4112.46484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1316.91015625, + "completions/mean_terminated_length": 1227.127197265625, + "completions/min_length": 404.0, + "completions/min_terminated_length": 404.0, + "entropy": 0.23557352274656296, + "epoch": 0.08230557863122244, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1472439020872116, + "learning_rate": 1e-06, + "loss": -0.0008, + "num_tokens": 210426931.0, + "reward": 0.52734375, + "reward_std": 0.19322282075881958, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 483, + "tools/generated_tokens": 3716.9140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1284.421875, + "completions/mean_terminated_length": 1216.187255859375, + "completions/min_length": 235.0, + "completions/min_terminated_length": 235.0, + "entropy": 0.2567291585728526, + "epoch": 0.08247598355592477, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17313428223133087, + "learning_rate": 1e-06, + "loss": 0.0085, + "num_tokens": 210825343.0, + "reward": 0.48046875, + "reward_std": 0.2983798384666443, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 484, + "tools/generated_tokens": 3572.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1321.9375, + "completions/mean_terminated_length": 1175.3662109375, + "completions/min_length": 57.0, + "completions/min_terminated_length": 57.0, + "entropy": 0.2825562469661236, + "epoch": 0.0826463884806271, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16595816612243652, + "learning_rate": 1e-06, + "loss": 0.009, + "num_tokens": 211246735.0, + "reward": 0.49609375, + "reward_std": 0.2654259204864502, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 485, + "tools/generated_tokens": 4417.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1374.1171875, + "completions/mean_terminated_length": 1284.677001953125, + "completions/min_length": 189.0, + "completions/min_terminated_length": 189.0, + "entropy": 0.2517801756039262, + "epoch": 0.08281679340532941, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14134158194065094, + "learning_rate": 1e-06, + "loss": 0.0198, + "num_tokens": 211678525.0, + "reward": 0.515625, + "reward_std": 0.25053930282592773, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 486, + "tools/generated_tokens": 4654.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1369.859375, + "completions/mean_terminated_length": 1232.96240234375, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.24507506284862757, + "epoch": 0.08298719833003174, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.12178443372249603, + "learning_rate": 1e-06, + "loss": -0.0033, + "num_tokens": 212114009.0, + "reward": 0.4765625, + "reward_std": 0.2162114828824997, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 487, + "tools/generated_tokens": 4409.87890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2007.0, + "completions/mean_length": 1294.640625, + "completions/mean_terminated_length": 1155.129638671875, + "completions/min_length": 114.0, + "completions/min_terminated_length": 114.0, + "entropy": 0.30812946148216724, + "epoch": 0.08315760325473406, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.1886834055185318, + "learning_rate": 1e-06, + "loss": 0.0092, + "num_tokens": 212552525.0, + "reward": 0.40234375, + "reward_std": 0.32440072298049927, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 488, + "tools/generated_tokens": 4702.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1274.91015625, + "completions/mean_terminated_length": 1140.1513671875, + "completions/min_length": 181.0, + "completions/min_terminated_length": 181.0, + "entropy": 0.26899847388267517, + "epoch": 0.08332800817943639, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12434171885251999, + "learning_rate": 1e-06, + "loss": 0.006, + "num_tokens": 212959094.0, + "reward": 0.48046875, + "reward_std": 0.12412451207637787, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 489, + "tools/generated_tokens": 4018.9140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.33984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1238.53515625, + "completions/mean_terminated_length": 1143.0960693359375, + "completions/min_length": 126.0, + "completions/min_terminated_length": 126.0, + "entropy": 0.2552452450618148, + "epoch": 0.0834984131041387, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17591121792793274, + "learning_rate": 1e-06, + "loss": 0.0046, + "num_tokens": 213349135.0, + "reward": 0.4375, + "reward_std": 0.24153748154640198, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 490, + "tools/generated_tokens": 3734.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.21875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1377.8515625, + "completions/mean_terminated_length": 1190.2099609375, + "completions/min_length": 159.0, + "completions/min_terminated_length": 159.0, + "entropy": 0.26415817346423864, + "epoch": 0.08366881802884103, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.31660452485084534, + "learning_rate": 1e-06, + "loss": 0.0221, + "num_tokens": 213779177.0, + "reward": 0.4609375, + "reward_std": 0.2829289138317108, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 491, + "tools/generated_tokens": 4633.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1330.4609375, + "completions/mean_terminated_length": 1164.875, + "completions/min_length": 309.0, + "completions/min_terminated_length": 309.0, + "entropy": 0.2483479054644704, + "epoch": 0.08383922295354336, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.16323445737361908, + "learning_rate": 1e-06, + "loss": 0.0353, + "num_tokens": 214203935.0, + "reward": 0.55078125, + "reward_std": 0.3387996554374695, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 492, + "tools/generated_tokens": 4642.46484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1271.5, + "completions/mean_terminated_length": 1156.596435546875, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.2604938466101885, + "epoch": 0.08400962787824569, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.2003244161605835, + "learning_rate": 1e-06, + "loss": 0.0277, + "num_tokens": 214614687.0, + "reward": 0.57421875, + "reward_std": 0.30289244651794434, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 493, + "tools/generated_tokens": 4375.51171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1401.3671875, + "completions/mean_terminated_length": 1288.6558837890625, + "completions/min_length": 412.0, + "completions/min_terminated_length": 412.0, + "entropy": 0.2736070640385151, + "epoch": 0.084180032802948, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18048396706581116, + "learning_rate": 1e-06, + "loss": 0.0188, + "num_tokens": 215059581.0, + "reward": 0.48046875, + "reward_std": 0.3220744729042053, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 494, + "tools/generated_tokens": 4505.39453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1332.9453125, + "completions/mean_terminated_length": 1159.3931884765625, + "completions/min_length": 348.0, + "completions/min_terminated_length": 348.0, + "entropy": 0.2770430566743016, + "epoch": 0.08435043772765033, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15728560090065002, + "learning_rate": 1e-06, + "loss": 0.0319, + "num_tokens": 215481007.0, + "reward": 0.3984375, + "reward_std": 0.2245136797428131, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 495, + "tools/generated_tokens": 4612.9609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1418.55859375, + "completions/mean_terminated_length": 1234.1767578125, + "completions/min_length": 180.0, + "completions/min_terminated_length": 180.0, + "entropy": 0.247409513220191, + "epoch": 0.08452084265235266, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11603175848722458, + "learning_rate": 1e-06, + "loss": 0.0355, + "num_tokens": 215923390.0, + "reward": 0.3828125, + "reward_std": 0.1854248344898224, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 496, + "tools/generated_tokens": 4498.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.50390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.03515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1181.125, + "completions/mean_terminated_length": 1149.5384521484375, + "completions/min_length": 191.0, + "completions/min_terminated_length": 191.0, + "entropy": 0.24172541499137878, + "epoch": 0.08469124757705497, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1781323105096817, + "learning_rate": 1e-06, + "loss": -0.0071, + "num_tokens": 216310734.0, + "reward": 0.4296875, + "reward_std": 0.2766585052013397, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 497, + "tools/generated_tokens": 3885.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1421.66796875, + "completions/mean_terminated_length": 1280.818115234375, + "completions/min_length": 95.0, + "completions/min_terminated_length": 95.0, + "entropy": 0.25911517534404993, + "epoch": 0.0848616525017573, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14875948429107666, + "learning_rate": 1e-06, + "loss": 0.0288, + "num_tokens": 216754537.0, + "reward": 0.484375, + "reward_std": 0.2211625725030899, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 498, + "tools/generated_tokens": 4517.66796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1181.16015625, + "completions/mean_terminated_length": 1119.5062255859375, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.2653562109917402, + "epoch": 0.08503205742645963, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18100285530090332, + "learning_rate": 1e-06, + "loss": 0.0382, + "num_tokens": 217134706.0, + "reward": 0.640625, + "reward_std": 0.23568323254585266, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 499, + "tools/generated_tokens": 3621.17578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1359.6796875, + "completions/mean_terminated_length": 1228.4232177734375, + "completions/min_length": 332.0, + "completions/min_terminated_length": 332.0, + "entropy": 0.25509117916226387, + "epoch": 0.08520246235116195, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15384499728679657, + "learning_rate": 1e-06, + "loss": 0.0148, + "num_tokens": 217567216.0, + "reward": 0.47265625, + "reward_std": 0.20348459482192993, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 500, + "tools/generated_tokens": 4735.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1266.765625, + "completions/mean_terminated_length": 1170.8245849609375, + "completions/min_length": 247.0, + "completions/min_terminated_length": 247.0, + "entropy": 0.27620676439255476, + "epoch": 0.08537286727586427, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20635609328746796, + "learning_rate": 1e-06, + "loss": 0.0132, + "num_tokens": 217969636.0, + "reward": 0.59375, + "reward_std": 0.33109644055366516, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 501, + "tools/generated_tokens": 4042.76171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1342.703125, + "completions/mean_terminated_length": 1262.973876953125, + "completions/min_length": 171.0, + "completions/min_terminated_length": 171.0, + "entropy": 0.22437008377164602, + "epoch": 0.0855432722005666, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15379106998443604, + "learning_rate": 1e-06, + "loss": 0.008, + "num_tokens": 218394248.0, + "reward": 0.59375, + "reward_std": 0.2731332778930664, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 502, + "tools/generated_tokens": 3902.71875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1311.43359375, + "completions/mean_terminated_length": 1065.9114990234375, + "completions/min_length": 290.0, + "completions/min_terminated_length": 290.0, + "entropy": 0.29152974020689726, + "epoch": 0.08571367712526892, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16427302360534668, + "learning_rate": 1e-06, + "loss": 0.0025, + "num_tokens": 218821463.0, + "reward": 0.390625, + "reward_std": 0.21940404176712036, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 503, + "tools/generated_tokens": 5111.4375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.85546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1223.25390625, + "completions/mean_terminated_length": 1109.626708984375, + "completions/min_length": 84.0, + "completions/min_terminated_length": 84.0, + "entropy": 0.30673097632825375, + "epoch": 0.08588408204997125, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15234944224357605, + "learning_rate": 1e-06, + "loss": 0.0004, + "num_tokens": 219224344.0, + "reward": 0.3671875, + "reward_std": 0.20818254351615906, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 504, + "tools/generated_tokens": 4231.26171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1348.7265625, + "completions/mean_terminated_length": 1139.2994384765625, + "completions/min_length": 279.0, + "completions/min_terminated_length": 279.0, + "entropy": 0.29540538880974054, + "epoch": 0.08605448697467356, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17678800225257874, + "learning_rate": 1e-06, + "loss": 0.0269, + "num_tokens": 219661154.0, + "reward": 0.46875, + "reward_std": 0.18739622831344604, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 505, + "tools/generated_tokens": 4868.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1341.7109375, + "completions/mean_terminated_length": 1178.72119140625, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.29668791219592094, + "epoch": 0.08622489189937589, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15114258229732513, + "learning_rate": 1e-06, + "loss": 0.0125, + "num_tokens": 220086680.0, + "reward": 0.484375, + "reward_std": 0.1959541141986847, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 506, + "tools/generated_tokens": 4781.7265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1269.5234375, + "completions/mean_terminated_length": 1138.0045166015625, + "completions/min_length": 185.0, + "completions/min_terminated_length": 185.0, + "entropy": 0.3113073166459799, + "epoch": 0.08639529682407822, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13736459612846375, + "learning_rate": 1e-06, + "loss": 0.0316, + "num_tokens": 220493294.0, + "reward": 0.39453125, + "reward_std": 0.17570874094963074, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 507, + "tools/generated_tokens": 4709.5234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1381.125, + "completions/mean_terminated_length": 1257.629638671875, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.27220352552831173, + "epoch": 0.08656570174878055, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17900468409061432, + "learning_rate": 1e-06, + "loss": -0.001, + "num_tokens": 220936398.0, + "reward": 0.38671875, + "reward_std": 0.3298344314098358, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 508, + "tools/generated_tokens": 4741.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1280.109375, + "completions/mean_terminated_length": 1107.42578125, + "completions/min_length": 76.0, + "completions/min_terminated_length": 76.0, + "entropy": 0.2506442693993449, + "epoch": 0.08673610667348286, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14439967274665833, + "learning_rate": 1e-06, + "loss": 0.0173, + "num_tokens": 221339642.0, + "reward": 0.6015625, + "reward_std": 0.2498009204864502, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 509, + "tools/generated_tokens": 4032.109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1274.8203125, + "completions/mean_terminated_length": 1082.47314453125, + "completions/min_length": 209.0, + "completions/min_terminated_length": 209.0, + "entropy": 0.24651102907955647, + "epoch": 0.08690651159818519, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1554550975561142, + "learning_rate": 1e-06, + "loss": 0.0297, + "num_tokens": 221744636.0, + "reward": 0.32421875, + "reward_std": 0.2420850694179535, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 510, + "tools/generated_tokens": 4362.8203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1379.84765625, + "completions/mean_terminated_length": 1175.3162841796875, + "completions/min_length": 263.0, + "completions/min_terminated_length": 263.0, + "entropy": 0.2568117417395115, + "epoch": 0.08707691652288752, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18768467009067535, + "learning_rate": 1e-06, + "loss": 0.0504, + "num_tokens": 222183605.0, + "reward": 0.44140625, + "reward_std": 0.2728801667690277, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 511, + "tools/generated_tokens": 5155.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.84375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1418.23046875, + "completions/mean_terminated_length": 1110.6685791015625, + "completions/min_length": 257.0, + "completions/min_terminated_length": 257.0, + "entropy": 0.22845259215682745, + "epoch": 0.08724732144758983, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1267181634902954, + "learning_rate": 1e-06, + "loss": 0.0095, + "num_tokens": 222628096.0, + "reward": 0.44921875, + "reward_std": 0.18362826108932495, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 512, + "tools/generated_tokens": 5154.23046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.82421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1265.6015625, + "completions/mean_terminated_length": 1107.652587890625, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.2324536293745041, + "epoch": 0.08741772637229216, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17009258270263672, + "learning_rate": 1e-06, + "loss": 0.0211, + "num_tokens": 223030538.0, + "reward": 0.734375, + "reward_std": 0.21204319596290588, + "rewards/simpleverify_reward/mean": 0.734375, + "rewards/simpleverify_reward/std": 0.4425306022167206, + "step": 513, + "tools/generated_tokens": 4585.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1289.421875, + "completions/mean_terminated_length": 1157.2017822265625, + "completions/min_length": 281.0, + "completions/min_terminated_length": 281.0, + "entropy": 0.2680962225422263, + "epoch": 0.08758813129699448, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17256851494312286, + "learning_rate": 1e-06, + "loss": -0.0061, + "num_tokens": 223446518.0, + "reward": 0.5390625, + "reward_std": 0.31664419174194336, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 514, + "tools/generated_tokens": 4801.4375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1303.2109375, + "completions/mean_terminated_length": 1157.037353515625, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.29077679850161076, + "epoch": 0.08775853622169681, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1605779081583023, + "learning_rate": 1e-06, + "loss": 0.031, + "num_tokens": 223857244.0, + "reward": 0.54296875, + "reward_std": 0.23087677359580994, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 515, + "tools/generated_tokens": 4199.21484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1261.7734375, + "completions/mean_terminated_length": 1184.1630859375, + "completions/min_length": 321.0, + "completions/min_terminated_length": 321.0, + "entropy": 0.26548791863024235, + "epoch": 0.08792894114639913, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17304372787475586, + "learning_rate": 1e-06, + "loss": 0.0364, + "num_tokens": 224258930.0, + "reward": 0.625, + "reward_std": 0.28111931681632996, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 516, + "tools/generated_tokens": 4133.76953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1248.0859375, + "completions/mean_terminated_length": 1091.0933837890625, + "completions/min_length": 168.0, + "completions/min_terminated_length": 168.0, + "entropy": 0.30637590028345585, + "epoch": 0.08809934607110145, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18615835905075073, + "learning_rate": 1e-06, + "loss": 0.0191, + "num_tokens": 224662584.0, + "reward": 0.5234375, + "reward_std": 0.20882563292980194, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 517, + "tools/generated_tokens": 4288.08984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.04296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1245.0, + "completions/mean_terminated_length": 1208.950927734375, + "completions/min_length": 210.0, + "completions/min_terminated_length": 210.0, + "entropy": 0.2723206877708435, + "epoch": 0.08826975099580378, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13748545944690704, + "learning_rate": 1e-06, + "loss": -0.0147, + "num_tokens": 225068904.0, + "reward": 0.55078125, + "reward_std": 0.19084002077579498, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 518, + "tools/generated_tokens": 4053.00390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1372.375, + "completions/mean_terminated_length": 1243.534912109375, + "completions/min_length": 237.0, + "completions/min_terminated_length": 237.0, + "entropy": 0.26467883214354515, + "epoch": 0.08844015592050611, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15883135795593262, + "learning_rate": 1e-06, + "loss": 0.0265, + "num_tokens": 225498264.0, + "reward": 0.62109375, + "reward_std": 0.23502905666828156, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 519, + "tools/generated_tokens": 4444.375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1420.25390625, + "completions/mean_terminated_length": 1244.4949951171875, + "completions/min_length": 303.0, + "completions/min_terminated_length": 303.0, + "entropy": 0.22691770363599062, + "epoch": 0.08861056084520842, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1373693346977234, + "learning_rate": 1e-06, + "loss": 0.0071, + "num_tokens": 225942569.0, + "reward": 0.5625, + "reward_std": 0.2284531146287918, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 520, + "tools/generated_tokens": 4340.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.42578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1351.48046875, + "completions/mean_terminated_length": 1173.936279296875, + "completions/min_length": 160.0, + "completions/min_terminated_length": 160.0, + "entropy": 0.2516844943165779, + "epoch": 0.08878096576991075, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.159901425242424, + "learning_rate": 1e-06, + "loss": 0.0193, + "num_tokens": 226375924.0, + "reward": 0.50390625, + "reward_std": 0.3148850202560425, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 521, + "tools/generated_tokens": 4591.48828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1325.875, + "completions/mean_terminated_length": 1167.6953125, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "entropy": 0.277279500849545, + "epoch": 0.08895137069461308, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15710198879241943, + "learning_rate": 1e-06, + "loss": 0.0055, + "num_tokens": 226796996.0, + "reward": 0.453125, + "reward_std": 0.1650887131690979, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 522, + "tools/generated_tokens": 4533.890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1365.37890625, + "completions/mean_terminated_length": 1242.69580078125, + "completions/min_length": 164.0, + "completions/min_terminated_length": 164.0, + "entropy": 0.2397918114438653, + "epoch": 0.0891217756193154, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15880653262138367, + "learning_rate": 1e-06, + "loss": 0.0034, + "num_tokens": 227216117.0, + "reward": 0.515625, + "reward_std": 0.24956358969211578, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 523, + "tools/generated_tokens": 3845.38671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2017.0, + "completions/mean_length": 1225.85546875, + "completions/mean_terminated_length": 1120.83251953125, + "completions/min_length": 158.0, + "completions/min_terminated_length": 158.0, + "entropy": 0.20685587171465158, + "epoch": 0.08929218054401772, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12243921309709549, + "learning_rate": 1e-06, + "loss": 0.0378, + "num_tokens": 227601408.0, + "reward": 0.6796875, + "reward_std": 0.22662898898124695, + "rewards/simpleverify_reward/mean": 0.6796875, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 524, + "tools/generated_tokens": 3705.859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1337.88671875, + "completions/mean_terminated_length": 1217.92236328125, + "completions/min_length": 125.0, + "completions/min_terminated_length": 125.0, + "entropy": 0.30382856726646423, + "epoch": 0.08946258546872005, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14467647671699524, + "learning_rate": 1e-06, + "loss": -0.0024, + "num_tokens": 228023443.0, + "reward": 0.359375, + "reward_std": 0.20192813873291016, + "rewards/simpleverify_reward/mean": 0.359375, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 525, + "tools/generated_tokens": 4401.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1220.4609375, + "completions/mean_terminated_length": 1089.4027099609375, + "completions/min_length": 260.0, + "completions/min_terminated_length": 260.0, + "entropy": 0.229482333175838, + "epoch": 0.08963299039342237, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1602201908826828, + "learning_rate": 1e-06, + "loss": 0.0291, + "num_tokens": 228406825.0, + "reward": 0.33984375, + "reward_std": 0.18903234601020813, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 526, + "tools/generated_tokens": 3828.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1407.58203125, + "completions/mean_terminated_length": 1224.15576171875, + "completions/min_length": 160.0, + "completions/min_terminated_length": 160.0, + "entropy": 0.2706261845305562, + "epoch": 0.08980339531812469, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16631732881069183, + "learning_rate": 1e-06, + "loss": 0.009, + "num_tokens": 228855758.0, + "reward": 0.48828125, + "reward_std": 0.2673723101615906, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 527, + "tools/generated_tokens": 4951.58203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1294.7890625, + "completions/mean_terminated_length": 1134.156494140625, + "completions/min_length": 277.0, + "completions/min_terminated_length": 277.0, + "entropy": 0.22482021152973175, + "epoch": 0.08997380024282702, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.178030326962471, + "learning_rate": 1e-06, + "loss": 0.0273, + "num_tokens": 229272232.0, + "reward": 0.73828125, + "reward_std": 0.29274123907089233, + "rewards/simpleverify_reward/mean": 0.73828125, + "rewards/simpleverify_reward/std": 0.4404313564300537, + "step": 528, + "tools/generated_tokens": 4430.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1346.6328125, + "completions/mean_terminated_length": 1176.4078369140625, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.24884235206991434, + "epoch": 0.09014420516752934, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12760142982006073, + "learning_rate": 1e-06, + "loss": -0.0013, + "num_tokens": 229693562.0, + "reward": 0.62890625, + "reward_std": 0.18332535028457642, + "rewards/simpleverify_reward/mean": 0.62890625, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 529, + "tools/generated_tokens": 4154.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1456.2890625, + "completions/mean_terminated_length": 1246.529052734375, + "completions/min_length": 332.0, + "completions/min_terminated_length": 332.0, + "entropy": 0.24968896806240082, + "epoch": 0.09031461009223167, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.17811577022075653, + "learning_rate": 1e-06, + "loss": 0.0058, + "num_tokens": 230152708.0, + "reward": 0.34765625, + "reward_std": 0.3287818431854248, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 530, + "tools/generated_tokens": 5256.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.85546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1370.6953125, + "completions/mean_terminated_length": 1256.2647705078125, + "completions/min_length": 212.0, + "completions/min_terminated_length": 212.0, + "entropy": 0.26154812704771757, + "epoch": 0.09048501501693398, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.11092137545347214, + "learning_rate": 1e-06, + "loss": 0.0068, + "num_tokens": 230576262.0, + "reward": 0.59765625, + "reward_std": 0.15151193737983704, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 531, + "tools/generated_tokens": 4050.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1431.92578125, + "completions/mean_terminated_length": 1263.3531494140625, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.26634883414953947, + "epoch": 0.09065541994163631, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17712172865867615, + "learning_rate": 1e-06, + "loss": 0.0237, + "num_tokens": 231030451.0, + "reward": 0.56640625, + "reward_std": 0.2361604869365692, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 532, + "tools/generated_tokens": 4807.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1407.33203125, + "completions/mean_terminated_length": 1202.587646484375, + "completions/min_length": 272.0, + "completions/min_terminated_length": 272.0, + "entropy": 0.252150890417397, + "epoch": 0.09082582486633864, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11143050342798233, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 231472984.0, + "reward": 0.41796875, + "reward_std": 0.17263562977313995, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 533, + "tools/generated_tokens": 4871.34765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2007.0, + "completions/mean_length": 1298.1875, + "completions/mean_terminated_length": 1120.6956787109375, + "completions/min_length": 205.0, + "completions/min_terminated_length": 205.0, + "entropy": 0.24661609530448914, + "epoch": 0.09099622979104097, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18985028564929962, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 231877496.0, + "reward": 0.6171875, + "reward_std": 0.2607851028442383, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 534, + "tools/generated_tokens": 4210.19140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1492.8359375, + "completions/mean_terminated_length": 1249.561767578125, + "completions/min_length": 151.0, + "completions/min_terminated_length": 151.0, + "entropy": 0.2084982069209218, + "epoch": 0.09116663471574328, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13431037962436676, + "learning_rate": 1e-06, + "loss": -0.0007, + "num_tokens": 232331982.0, + "reward": 0.55859375, + "reward_std": 0.14171826839447021, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 535, + "tools/generated_tokens": 4764.8359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2013.0, + "completions/mean_length": 1331.87109375, + "completions/mean_terminated_length": 1112.64794921875, + "completions/min_length": 80.0, + "completions/min_terminated_length": 80.0, + "entropy": 0.29038824141025543, + "epoch": 0.09133703964044561, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16294419765472412, + "learning_rate": 1e-06, + "loss": 0.0589, + "num_tokens": 232762781.0, + "reward": 0.5234375, + "reward_std": 0.28491154313087463, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 536, + "tools/generated_tokens": 5019.875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1433.65234375, + "completions/mean_terminated_length": 1277.058837890625, + "completions/min_length": 187.0, + "completions/min_terminated_length": 187.0, + "entropy": 0.2674070904031396, + "epoch": 0.09150744456514794, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.10762283205986023, + "learning_rate": 1e-06, + "loss": 0.0084, + "num_tokens": 233212468.0, + "reward": 0.54296875, + "reward_std": 0.14274312555789948, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 537, + "tools/generated_tokens": 4713.66015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2012.0, + "completions/mean_length": 1267.76953125, + "completions/mean_terminated_length": 1064.0738525390625, + "completions/min_length": 137.0, + "completions/min_terminated_length": 137.0, + "entropy": 0.24824349116533995, + "epoch": 0.09167784948985026, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18577617406845093, + "learning_rate": 1e-06, + "loss": -0.0343, + "num_tokens": 233626473.0, + "reward": 0.58984375, + "reward_std": 0.3347148895263672, + "rewards/simpleverify_reward/mean": 0.58984375, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 538, + "tools/generated_tokens": 4747.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1400.86328125, + "completions/mean_terminated_length": 1243.791259765625, + "completions/min_length": 226.0, + "completions/min_terminated_length": 226.0, + "entropy": 0.25223646126687527, + "epoch": 0.09184825441455258, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14792688190937042, + "learning_rate": 1e-06, + "loss": 0.0154, + "num_tokens": 234070438.0, + "reward": 0.5078125, + "reward_std": 0.21080546081066132, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 539, + "tools/generated_tokens": 4536.875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1425.5703125, + "completions/mean_terminated_length": 1182.016357421875, + "completions/min_length": 199.0, + "completions/min_terminated_length": 199.0, + "entropy": 0.2843840243294835, + "epoch": 0.0920186593392549, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1389995813369751, + "learning_rate": 1e-06, + "loss": 0.0402, + "num_tokens": 234517000.0, + "reward": 0.38671875, + "reward_std": 0.2043357938528061, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 540, + "tools/generated_tokens": 5017.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1396.09765625, + "completions/mean_terminated_length": 1257.0711669921875, + "completions/min_length": 120.0, + "completions/min_terminated_length": 120.0, + "entropy": 0.23499319050461054, + "epoch": 0.09218906426395723, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13102610409259796, + "learning_rate": 1e-06, + "loss": 0.0186, + "num_tokens": 234945249.0, + "reward": 0.33984375, + "reward_std": 0.20775945484638214, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 541, + "tools/generated_tokens": 4012.10546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.27734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1371.35546875, + "completions/mean_terminated_length": 1168.7156982421875, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "entropy": 0.24373176600784063, + "epoch": 0.09235946918865955, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16179470717906952, + "learning_rate": 1e-06, + "loss": 0.0246, + "num_tokens": 235375692.0, + "reward": 0.56640625, + "reward_std": 0.18959103524684906, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 542, + "tools/generated_tokens": 4483.36328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1401.87109375, + "completions/mean_terminated_length": 1123.9329833984375, + "completions/min_length": 142.0, + "completions/min_terminated_length": 142.0, + "entropy": 0.24623981583863497, + "epoch": 0.09252987411336187, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17732244729995728, + "learning_rate": 1e-06, + "loss": 0.0108, + "num_tokens": 235816747.0, + "reward": 0.46484375, + "reward_std": 0.328233540058136, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 543, + "tools/generated_tokens": 5241.875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1517.2890625, + "completions/mean_terminated_length": 1347.7421875, + "completions/min_length": 186.0, + "completions/min_terminated_length": 186.0, + "entropy": 0.255928092636168, + "epoch": 0.0927002790380642, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13415896892547607, + "learning_rate": 1e-06, + "loss": 0.0056, + "num_tokens": 236285221.0, + "reward": 0.55859375, + "reward_std": 0.229627788066864, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 544, + "tools/generated_tokens": 4957.34375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1417.71484375, + "completions/mean_terminated_length": 1279.6524658203125, + "completions/min_length": 177.0, + "completions/min_terminated_length": 177.0, + "entropy": 0.28612892888486385, + "epoch": 0.09287068396276653, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1610061079263687, + "learning_rate": 1e-06, + "loss": 0.0256, + "num_tokens": 236731820.0, + "reward": 0.42578125, + "reward_std": 0.16926807165145874, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 545, + "tools/generated_tokens": 4377.71875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1361.08984375, + "completions/mean_terminated_length": 1214.59716796875, + "completions/min_length": 115.0, + "completions/min_terminated_length": 115.0, + "entropy": 0.2613782323896885, + "epoch": 0.09304108888746884, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.18205983936786652, + "learning_rate": 1e-06, + "loss": 0.0454, + "num_tokens": 237164915.0, + "reward": 0.61328125, + "reward_std": 0.34847772121429443, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 546, + "tools/generated_tokens": 4737.1015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1422.04296875, + "completions/mean_terminated_length": 1217.725341796875, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.21857111807912588, + "epoch": 0.09321149381217117, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.08905293047428131, + "learning_rate": 1e-06, + "loss": 0.0179, + "num_tokens": 237598270.0, + "reward": 0.4375, + "reward_std": 0.09011821448802948, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 547, + "tools/generated_tokens": 4366.046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1393.62890625, + "completions/mean_terminated_length": 1206.2060546875, + "completions/min_length": 261.0, + "completions/min_terminated_length": 261.0, + "entropy": 0.23382233548909426, + "epoch": 0.0933818987368735, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13700401782989502, + "learning_rate": 1e-06, + "loss": 0.001, + "num_tokens": 238030735.0, + "reward": 0.390625, + "reward_std": 0.20049379765987396, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 548, + "tools/generated_tokens": 4417.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1373.2265625, + "completions/mean_terminated_length": 1171.187744140625, + "completions/min_length": 158.0, + "completions/min_terminated_length": 158.0, + "entropy": 0.3008579695597291, + "epoch": 0.09355230366157583, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1843133270740509, + "learning_rate": 1e-06, + "loss": 0.0229, + "num_tokens": 238468393.0, + "reward": 0.58203125, + "reward_std": 0.2604767680168152, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 549, + "tools/generated_tokens": 4829.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1389.98828125, + "completions/mean_terminated_length": 1127.51904296875, + "completions/min_length": 235.0, + "completions/min_terminated_length": 235.0, + "entropy": 0.2668496873229742, + "epoch": 0.09372270858627814, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1654992401599884, + "learning_rate": 1e-06, + "loss": -0.0029, + "num_tokens": 238912774.0, + "reward": 0.43359375, + "reward_std": 0.3381253480911255, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 550, + "tools/generated_tokens": 5118.00390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1324.28515625, + "completions/mean_terminated_length": 1194.216552734375, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.25283054634928703, + "epoch": 0.09389311351098047, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13537749648094177, + "learning_rate": 1e-06, + "loss": -0.0239, + "num_tokens": 239338399.0, + "reward": 0.37890625, + "reward_std": 0.21953773498535156, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 551, + "tools/generated_tokens": 4316.3125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1362.66796875, + "completions/mean_terminated_length": 1166.371826171875, + "completions/min_length": 144.0, + "completions/min_terminated_length": 144.0, + "entropy": 0.2578463824465871, + "epoch": 0.0940635184356828, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1487479954957962, + "learning_rate": 1e-06, + "loss": 0.0015, + "num_tokens": 239762970.0, + "reward": 0.54296875, + "reward_std": 0.2552996277809143, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 552, + "tools/generated_tokens": 4442.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.50390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1414.5390625, + "completions/mean_terminated_length": 1268.3558349609375, + "completions/min_length": 300.0, + "completions/min_terminated_length": 300.0, + "entropy": 0.24511006101965904, + "epoch": 0.09423392336038512, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.11943556368350983, + "learning_rate": 1e-06, + "loss": 0.0163, + "num_tokens": 240209572.0, + "reward": 0.4375, + "reward_std": 0.18541164696216583, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 553, + "tools/generated_tokens": 4622.55078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1462.5, + "completions/mean_terminated_length": 1210.6424560546875, + "completions/min_length": 237.0, + "completions/min_terminated_length": 237.0, + "entropy": 0.2894366355612874, + "epoch": 0.09440432828508744, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15874801576137543, + "learning_rate": 1e-06, + "loss": -0.0072, + "num_tokens": 240663108.0, + "reward": 0.453125, + "reward_std": 0.2290801852941513, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 554, + "tools/generated_tokens": 4910.515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1432.4765625, + "completions/mean_terminated_length": 1283.07763671875, + "completions/min_length": 129.0, + "completions/min_terminated_length": 129.0, + "entropy": 0.2341524614021182, + "epoch": 0.09457473320978976, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1132907047867775, + "learning_rate": 1e-06, + "loss": 0.0282, + "num_tokens": 241101166.0, + "reward": 0.640625, + "reward_std": 0.1433543860912323, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 555, + "tools/generated_tokens": 3984.484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1259.16015625, + "completions/mean_terminated_length": 1062.9268798828125, + "completions/min_length": 162.0, + "completions/min_terminated_length": 162.0, + "entropy": 0.25118235033005476, + "epoch": 0.09474513813449209, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18437190353870392, + "learning_rate": 1e-06, + "loss": 0.0122, + "num_tokens": 241501047.0, + "reward": 0.671875, + "reward_std": 0.264829158782959, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 556, + "tools/generated_tokens": 4419.18359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1393.91796875, + "completions/mean_terminated_length": 1193.688720703125, + "completions/min_length": 370.0, + "completions/min_terminated_length": 370.0, + "entropy": 0.27406329568475485, + "epoch": 0.0949155430591944, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1724853217601776, + "learning_rate": 1e-06, + "loss": 0.0252, + "num_tokens": 241932562.0, + "reward": 0.51171875, + "reward_std": 0.22209002077579498, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 557, + "tools/generated_tokens": 4561.91796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1309.20703125, + "completions/mean_terminated_length": 1151.6492919921875, + "completions/min_length": 54.0, + "completions/min_terminated_length": 54.0, + "entropy": 0.24596633110195398, + "epoch": 0.09508594798389673, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16630980372428894, + "learning_rate": 1e-06, + "loss": -0.0091, + "num_tokens": 242344311.0, + "reward": 0.671875, + "reward_std": 0.1959541141986847, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 558, + "tools/generated_tokens": 3941.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2026.0, + "completions/mean_length": 1327.453125, + "completions/mean_terminated_length": 1116.388916015625, + "completions/min_length": 85.0, + "completions/min_terminated_length": 85.0, + "entropy": 0.2647271901369095, + "epoch": 0.09525635290859906, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.26405781507492065, + "learning_rate": 1e-06, + "loss": 0.0272, + "num_tokens": 242770811.0, + "reward": 0.609375, + "reward_std": 0.27398645877838135, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 559, + "tools/generated_tokens": 4615.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1428.12890625, + "completions/mean_terminated_length": 1250.6180419921875, + "completions/min_length": 283.0, + "completions/min_terminated_length": 283.0, + "entropy": 0.2787305386736989, + "epoch": 0.09542675783330139, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15779337286949158, + "learning_rate": 1e-06, + "loss": 0.0039, + "num_tokens": 243219868.0, + "reward": 0.34375, + "reward_std": 0.2779267430305481, + "rewards/simpleverify_reward/mean": 0.34375, + "rewards/simpleverify_reward/std": 0.47588926553726196, + "step": 560, + "tools/generated_tokens": 5172.1796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1295.26171875, + "completions/mean_terminated_length": 1139.04248046875, + "completions/min_length": 194.0, + "completions/min_terminated_length": 194.0, + "entropy": 0.23139166831970215, + "epoch": 0.0955971627580037, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12333023548126221, + "learning_rate": 1e-06, + "loss": 0.0263, + "num_tokens": 243627071.0, + "reward": 0.7578125, + "reward_std": 0.17978152632713318, + "rewards/simpleverify_reward/mean": 0.7578125, + "rewards/simpleverify_reward/std": 0.4292463958263397, + "step": 561, + "tools/generated_tokens": 3959.2890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1309.5, + "completions/mean_terminated_length": 1164.57470703125, + "completions/min_length": 95.0, + "completions/min_terminated_length": 95.0, + "entropy": 0.28400498628616333, + "epoch": 0.09576756768270603, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18991196155548096, + "learning_rate": 1e-06, + "loss": 0.0227, + "num_tokens": 244047359.0, + "reward": 0.63671875, + "reward_std": 0.272707998752594, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 562, + "tools/generated_tokens": 4157.51953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1264.51953125, + "completions/mean_terminated_length": 1140.4434814453125, + "completions/min_length": 136.0, + "completions/min_terminated_length": 136.0, + "entropy": 0.27960452903062105, + "epoch": 0.09593797260740836, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14789274334907532, + "learning_rate": 1e-06, + "loss": -0.0177, + "num_tokens": 244446196.0, + "reward": 0.546875, + "reward_std": 0.19343584775924683, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 563, + "tools/generated_tokens": 4008.53125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.33984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1345.1953125, + "completions/mean_terminated_length": 1265.747802734375, + "completions/min_length": 361.0, + "completions/min_terminated_length": 361.0, + "entropy": 0.29170692525804043, + "epoch": 0.09610837753211068, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1148810014128685, + "learning_rate": 1e-06, + "loss": 0.0011, + "num_tokens": 244868278.0, + "reward": 0.42578125, + "reward_std": 0.14161168038845062, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 564, + "tools/generated_tokens": 4537.203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1373.40625, + "completions/mean_terminated_length": 1283.86279296875, + "completions/min_length": 69.0, + "completions/min_terminated_length": 69.0, + "entropy": 0.2389817675575614, + "epoch": 0.096278782456813, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1451931744813919, + "learning_rate": 1e-06, + "loss": -0.001, + "num_tokens": 245283166.0, + "reward": 0.68359375, + "reward_std": 0.25705814361572266, + "rewards/simpleverify_reward/mean": 0.68359375, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 565, + "tools/generated_tokens": 3573.41796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.07421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1485.703125, + "completions/mean_terminated_length": 1191.1905517578125, + "completions/min_length": 374.0, + "completions/min_terminated_length": 374.0, + "entropy": 0.2955512637272477, + "epoch": 0.09644918738151533, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1512872725725174, + "learning_rate": 1e-06, + "loss": 0.0033, + "num_tokens": 245744994.0, + "reward": 0.33203125, + "reward_std": 0.1676161289215088, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 566, + "tools/generated_tokens": 5485.7265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1331.4609375, + "completions/mean_terminated_length": 1202.6866455078125, + "completions/min_length": 268.0, + "completions/min_terminated_length": 268.0, + "entropy": 0.26168250665068626, + "epoch": 0.09661959230621765, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16792535781860352, + "learning_rate": 1e-06, + "loss": 0.0394, + "num_tokens": 246161976.0, + "reward": 0.66015625, + "reward_std": 0.27168312668800354, + "rewards/simpleverify_reward/mean": 0.66015625, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 567, + "tools/generated_tokens": 4283.4921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1477.09375, + "completions/mean_terminated_length": 1286.8021240234375, + "completions/min_length": 408.0, + "completions/min_terminated_length": 408.0, + "entropy": 0.2853711638599634, + "epoch": 0.09678999723091998, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17652322351932526, + "learning_rate": 1e-06, + "loss": 0.0291, + "num_tokens": 246615088.0, + "reward": 0.51171875, + "reward_std": 0.3131811320781708, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 568, + "tools/generated_tokens": 4989.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.34375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1488.10546875, + "completions/mean_terminated_length": 1194.83935546875, + "completions/min_length": 231.0, + "completions/min_terminated_length": 231.0, + "entropy": 0.26467016711831093, + "epoch": 0.0969604021556223, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1525377780199051, + "learning_rate": 1e-06, + "loss": 0.005, + "num_tokens": 247072363.0, + "reward": 0.49609375, + "reward_std": 0.20377904176712036, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 569, + "tools/generated_tokens": 4888.11328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1282.50390625, + "completions/mean_terminated_length": 1140.75, + "completions/min_length": 298.0, + "completions/min_terminated_length": 298.0, + "entropy": 0.2252415968105197, + "epoch": 0.09713080708032462, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1339685469865799, + "learning_rate": 1e-06, + "loss": -0.0123, + "num_tokens": 247492268.0, + "reward": 0.28515625, + "reward_std": 0.2525572180747986, + "rewards/simpleverify_reward/mean": 0.28515625, + "rewards/simpleverify_reward/std": 0.4523732364177704, + "step": 570, + "tools/generated_tokens": 4410.5078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.52734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2006.0, + "completions/mean_length": 1325.890625, + "completions/mean_terminated_length": 1240.7510986328125, + "completions/min_length": 191.0, + "completions/min_terminated_length": 191.0, + "entropy": 0.25900744181126356, + "epoch": 0.09730121200502695, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18688349425792694, + "learning_rate": 1e-06, + "loss": 0.0256, + "num_tokens": 247899936.0, + "reward": 0.578125, + "reward_std": 0.311518132686615, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 571, + "tools/generated_tokens": 4149.90234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1395.62890625, + "completions/mean_terminated_length": 1204.5302734375, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.3108964003622532, + "epoch": 0.09747161692972926, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.3480401039123535, + "learning_rate": 1e-06, + "loss": -0.0001, + "num_tokens": 248347121.0, + "reward": 0.53515625, + "reward_std": 0.2348030060529709, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 572, + "tools/generated_tokens": 4563.63671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1355.421875, + "completions/mean_terminated_length": 1143.4080810546875, + "completions/min_length": 277.0, + "completions/min_terminated_length": 277.0, + "entropy": 0.3062203638255596, + "epoch": 0.09764202185443159, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13149289786815643, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 248771501.0, + "reward": 0.55078125, + "reward_std": 0.22996041178703308, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 573, + "tools/generated_tokens": 4691.41796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2021.0, + "completions/mean_length": 1373.05078125, + "completions/mean_terminated_length": 1152.7305908203125, + "completions/min_length": 287.0, + "completions/min_terminated_length": 287.0, + "entropy": 0.2653664303943515, + "epoch": 0.09781242677913392, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.1707451492547989, + "learning_rate": 1e-06, + "loss": 0.0444, + "num_tokens": 249209626.0, + "reward": 0.61328125, + "reward_std": 0.3082984387874603, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 574, + "tools/generated_tokens": 5197.05078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.35546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1486.21484375, + "completions/mean_terminated_length": 1176.39990234375, + "completions/min_length": 194.0, + "completions/min_terminated_length": 194.0, + "entropy": 0.25862658116966486, + "epoch": 0.09798283170383625, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15441524982452393, + "learning_rate": 1e-06, + "loss": 0.0235, + "num_tokens": 249675457.0, + "reward": 0.4609375, + "reward_std": 0.25491246581077576, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 575, + "tools/generated_tokens": 5558.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.98828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1417.859375, + "completions/mean_terminated_length": 1237.3668212890625, + "completions/min_length": 416.0, + "completions/min_terminated_length": 416.0, + "entropy": 0.26523235253989697, + "epoch": 0.09815323662853856, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17353393137454987, + "learning_rate": 1e-06, + "loss": 0.0453, + "num_tokens": 250119933.0, + "reward": 0.47265625, + "reward_std": 0.2930987477302551, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 576, + "tools/generated_tokens": 5121.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1413.68359375, + "completions/mean_terminated_length": 1140.8323974609375, + "completions/min_length": 154.0, + "completions/min_terminated_length": 154.0, + "entropy": 0.29382974095642567, + "epoch": 0.09832364155324089, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1510627418756485, + "learning_rate": 1e-06, + "loss": 0.0405, + "num_tokens": 250565788.0, + "reward": 0.39453125, + "reward_std": 0.19226783514022827, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 577, + "tools/generated_tokens": 5093.7421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1327.44921875, + "completions/mean_terminated_length": 1121.0703125, + "completions/min_length": 194.0, + "completions/min_terminated_length": 194.0, + "entropy": 0.24083214346319437, + "epoch": 0.09849404647794321, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16204893589019775, + "learning_rate": 1e-06, + "loss": 0.0186, + "num_tokens": 250983343.0, + "reward": 0.64453125, + "reward_std": 0.25978732109069824, + "rewards/simpleverify_reward/mean": 0.64453125, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 578, + "tools/generated_tokens": 4263.46484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1393.5390625, + "completions/mean_terminated_length": 1170.8272705078125, + "completions/min_length": 281.0, + "completions/min_terminated_length": 281.0, + "entropy": 0.2650506068021059, + "epoch": 0.09866445140264554, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1399562507867813, + "learning_rate": 1e-06, + "loss": 0.0208, + "num_tokens": 251413289.0, + "reward": 0.36328125, + "reward_std": 0.2348029911518097, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 579, + "tools/generated_tokens": 4833.54296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1326.2890625, + "completions/mean_terminated_length": 1172.37451171875, + "completions/min_length": 231.0, + "completions/min_terminated_length": 231.0, + "entropy": 0.2585566472262144, + "epoch": 0.09883485632734786, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16252174973487854, + "learning_rate": 1e-06, + "loss": 0.0422, + "num_tokens": 251837987.0, + "reward": 0.70703125, + "reward_std": 0.2803495228290558, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 580, + "tools/generated_tokens": 4486.29296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1351.0078125, + "completions/mean_terminated_length": 1118.682373046875, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.27031111624091864, + "epoch": 0.09900526125205018, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1783943921327591, + "learning_rate": 1e-06, + "loss": 0.0044, + "num_tokens": 252273317.0, + "reward": 0.51953125, + "reward_std": 0.31449854373931885, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 581, + "tools/generated_tokens": 5079.02734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1314.05859375, + "completions/mean_terminated_length": 1064.2984619140625, + "completions/min_length": 170.0, + "completions/min_terminated_length": 170.0, + "entropy": 0.27539417054504156, + "epoch": 0.09917566617675251, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14385437965393066, + "learning_rate": 1e-06, + "loss": 0.0234, + "num_tokens": 252692804.0, + "reward": 0.421875, + "reward_std": 0.18023642897605896, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 582, + "tools/generated_tokens": 4722.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1354.72265625, + "completions/mean_terminated_length": 1142.5, + "completions/min_length": 211.0, + "completions/min_terminated_length": 211.0, + "entropy": 0.2697906754910946, + "epoch": 0.09934607110145484, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11965537816286087, + "learning_rate": 1e-06, + "loss": 0.0106, + "num_tokens": 253122493.0, + "reward": 0.49609375, + "reward_std": 0.19038984179496765, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 583, + "tools/generated_tokens": 5066.7421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1370.55078125, + "completions/mean_terminated_length": 1154.04638671875, + "completions/min_length": 40.0, + "completions/min_terminated_length": 40.0, + "entropy": 0.24087819084525108, + "epoch": 0.09951647602615715, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13983558118343353, + "learning_rate": 1e-06, + "loss": 0.0118, + "num_tokens": 253552842.0, + "reward": 0.63671875, + "reward_std": 0.17177122831344604, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 584, + "tools/generated_tokens": 4498.546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.52734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1396.62109375, + "completions/mean_terminated_length": 1242.429931640625, + "completions/min_length": 219.0, + "completions/min_terminated_length": 219.0, + "entropy": 0.2532341908663511, + "epoch": 0.09968688095085948, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1424928903579712, + "learning_rate": 1e-06, + "loss": 0.028, + "num_tokens": 253988489.0, + "reward": 0.578125, + "reward_std": 0.20938239991664886, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 585, + "tools/generated_tokens": 4484.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1379.99609375, + "completions/mean_terminated_length": 1157.3333740234375, + "completions/min_length": 206.0, + "completions/min_terminated_length": 206.0, + "entropy": 0.2904137782752514, + "epoch": 0.09985728587556181, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1823052614927292, + "learning_rate": 1e-06, + "loss": 0.0017, + "num_tokens": 254409576.0, + "reward": 0.62109375, + "reward_std": 0.2342825084924698, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 586, + "tools/generated_tokens": 4236.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1428.69140625, + "completions/mean_terminated_length": 1181.644775390625, + "completions/min_length": 130.0, + "completions/min_terminated_length": 130.0, + "entropy": 0.2986216712743044, + "epoch": 0.10002769080026412, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12949174642562866, + "learning_rate": 1e-06, + "loss": 0.0378, + "num_tokens": 254860249.0, + "reward": 0.49609375, + "reward_std": 0.18408125638961792, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 587, + "tools/generated_tokens": 5332.73046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.90625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1283.09765625, + "completions/mean_terminated_length": 1192.9127197265625, + "completions/min_length": 42.0, + "completions/min_terminated_length": 42.0, + "entropy": 0.2757903980091214, + "epoch": 0.10019809572496645, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14384324848651886, + "learning_rate": 1e-06, + "loss": 0.005, + "num_tokens": 255253570.0, + "reward": 0.57421875, + "reward_std": 0.22808241844177246, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 588, + "tools/generated_tokens": 3539.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1481.046875, + "completions/mean_terminated_length": 1204.18017578125, + "completions/min_length": 168.0, + "completions/min_terminated_length": 168.0, + "entropy": 0.28604976274073124, + "epoch": 0.10036850064966878, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.23569124937057495, + "learning_rate": 1e-06, + "loss": 0.043, + "num_tokens": 255712222.0, + "reward": 0.546875, + "reward_std": 0.29567813873291016, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 589, + "tools/generated_tokens": 5297.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.86328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2000.0, + "completions/mean_length": 1219.1171875, + "completions/mean_terminated_length": 1012.9121704101562, + "completions/min_length": 168.0, + "completions/min_terminated_length": 168.0, + "entropy": 0.28153133019804955, + "epoch": 0.1005389055743711, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16634711623191833, + "learning_rate": 1e-06, + "loss": 0.01, + "num_tokens": 256111612.0, + "reward": 0.5625, + "reward_std": 0.2226376235485077, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 590, + "tools/generated_tokens": 4539.125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1331.47265625, + "completions/mean_terminated_length": 1072.3138427734375, + "completions/min_length": 173.0, + "completions/min_terminated_length": 173.0, + "entropy": 0.2969023184850812, + "epoch": 0.10070931049907342, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15822678804397583, + "learning_rate": 1e-06, + "loss": 0.03, + "num_tokens": 256538581.0, + "reward": 0.40625, + "reward_std": 0.2191779911518097, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 591, + "tools/generated_tokens": 4739.4921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1235.59375, + "completions/mean_terminated_length": 1106.94580078125, + "completions/min_length": 97.0, + "completions/min_terminated_length": 97.0, + "entropy": 0.2496339399367571, + "epoch": 0.10087971542377575, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13325046002864838, + "learning_rate": 1e-06, + "loss": 0.0132, + "num_tokens": 256934813.0, + "reward": 0.4140625, + "reward_std": 0.2615154981613159, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 592, + "tools/generated_tokens": 4283.61328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1467.3203125, + "completions/mean_terminated_length": 1222.14453125, + "completions/min_length": 218.0, + "completions/min_terminated_length": 218.0, + "entropy": 0.2549753934144974, + "epoch": 0.10105012034847807, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.13179758191108704, + "learning_rate": 1e-06, + "loss": -0.0164, + "num_tokens": 257393599.0, + "reward": 0.5625, + "reward_std": 0.25197336077690125, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 593, + "tools/generated_tokens": 4995.3203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1316.12890625, + "completions/mean_terminated_length": 1168.3802490234375, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.26244362629950047, + "epoch": 0.1012205252731804, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.19303129613399506, + "learning_rate": 1e-06, + "loss": 0.0399, + "num_tokens": 257816256.0, + "reward": 0.625, + "reward_std": 0.3593369722366333, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 594, + "tools/generated_tokens": 4332.1484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1395.97265625, + "completions/mean_terminated_length": 1221.6683349609375, + "completions/min_length": 105.0, + "completions/min_terminated_length": 105.0, + "entropy": 0.2524370811879635, + "epoch": 0.10139093019788271, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15308091044425964, + "learning_rate": 1e-06, + "loss": 0.027, + "num_tokens": 258260297.0, + "reward": 0.515625, + "reward_std": 0.24872365593910217, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 595, + "tools/generated_tokens": 5123.98828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1274.6171875, + "completions/mean_terminated_length": 1077.4853515625, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.2460261918604374, + "epoch": 0.10156133512258504, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.11448148638010025, + "learning_rate": 1e-06, + "loss": 0.0518, + "num_tokens": 258672919.0, + "reward": 0.4453125, + "reward_std": 0.1364503651857376, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 596, + "tools/generated_tokens": 4514.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1299.58984375, + "completions/mean_terminated_length": 1094.801025390625, + "completions/min_length": 230.0, + "completions/min_terminated_length": 230.0, + "entropy": 0.28133365977555513, + "epoch": 0.10173174004728737, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12616626918315887, + "learning_rate": 1e-06, + "loss": 0.0215, + "num_tokens": 259080366.0, + "reward": 0.484375, + "reward_std": 0.13896197080612183, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 597, + "tools/generated_tokens": 4363.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1217.3671875, + "completions/mean_terminated_length": 1158.2845458984375, + "completions/min_length": 214.0, + "completions/min_terminated_length": 214.0, + "entropy": 0.24622021056711674, + "epoch": 0.1019021449719897, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16811169683933258, + "learning_rate": 1e-06, + "loss": 0.0237, + "num_tokens": 259475324.0, + "reward": 0.69921875, + "reward_std": 0.2175418734550476, + "rewards/simpleverify_reward/mean": 0.69921875, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 598, + "tools/generated_tokens": 3745.37109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1126.17578125, + "completions/mean_terminated_length": 994.4866333007812, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.25988560542464256, + "epoch": 0.10207254989669201, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21057769656181335, + "learning_rate": 1e-06, + "loss": 0.0157, + "num_tokens": 259847241.0, + "reward": 0.6015625, + "reward_std": 0.26239442825317383, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 599, + "tools/generated_tokens": 3814.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1431.1484375, + "completions/mean_terminated_length": 1199.00537109375, + "completions/min_length": 280.0, + "completions/min_terminated_length": 280.0, + "entropy": 0.23735546227544546, + "epoch": 0.10224295482139434, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15083478391170502, + "learning_rate": 1e-06, + "loss": 0.0431, + "num_tokens": 260298815.0, + "reward": 0.515625, + "reward_std": 0.31496453285217285, + "rewards/simpleverify_reward/mean": 0.515625, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 600, + "tools/generated_tokens": 5311.1484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.89453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1336.73828125, + "completions/mean_terminated_length": 1119.0101318359375, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "entropy": 0.28465794399380684, + "epoch": 0.10241335974609667, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1729150116443634, + "learning_rate": 1e-06, + "loss": 0.0123, + "num_tokens": 260723436.0, + "reward": 0.51953125, + "reward_std": 0.25559213757514954, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 601, + "tools/generated_tokens": 4544.7578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1504.16015625, + "completions/mean_terminated_length": 1238.5814208984375, + "completions/min_length": 259.0, + "completions/min_terminated_length": 259.0, + "entropy": 0.2872716346755624, + "epoch": 0.10258376467079898, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13313445448875427, + "learning_rate": 1e-06, + "loss": 0.0354, + "num_tokens": 261199509.0, + "reward": 0.2421875, + "reward_std": 0.20465734601020813, + "rewards/simpleverify_reward/mean": 0.2421875, + "rewards/simpleverify_reward/std": 0.4292463958263397, + "step": 602, + "tools/generated_tokens": 5704.18359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.05078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1308.875, + "completions/mean_terminated_length": 1106.6268310546875, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.3057608436793089, + "epoch": 0.10275416959550131, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1824343204498291, + "learning_rate": 1e-06, + "loss": 0.0293, + "num_tokens": 261613621.0, + "reward": 0.578125, + "reward_std": 0.2079564929008484, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 603, + "tools/generated_tokens": 4460.8828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1194.90234375, + "completions/mean_terminated_length": 1110.6995849609375, + "completions/min_length": 128.0, + "completions/min_terminated_length": 128.0, + "entropy": 0.2693713651970029, + "epoch": 0.10292457452020363, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17988616228103638, + "learning_rate": 1e-06, + "loss": 0.0239, + "num_tokens": 261994300.0, + "reward": 0.578125, + "reward_std": 0.2487104833126068, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 604, + "tools/generated_tokens": 3602.91015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.17578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1287.71875, + "completions/mean_terminated_length": 1116.7607421875, + "completions/min_length": 251.0, + "completions/min_terminated_length": 251.0, + "entropy": 0.27751616202294827, + "epoch": 0.10309497944490596, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14557887613773346, + "learning_rate": 1e-06, + "loss": 0.0268, + "num_tokens": 262404740.0, + "reward": 0.59375, + "reward_std": 0.1849614679813385, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 605, + "tools/generated_tokens": 4519.73046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1460.26171875, + "completions/mean_terminated_length": 1225.819580078125, + "completions/min_length": 290.0, + "completions/min_terminated_length": 290.0, + "entropy": 0.25700395181775093, + "epoch": 0.10326538436960828, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.135623499751091, + "learning_rate": 1e-06, + "loss": 0.017, + "num_tokens": 262861479.0, + "reward": 0.41015625, + "reward_std": 0.2205064743757248, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 606, + "tools/generated_tokens": 4972.2734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1311.9765625, + "completions/mean_terminated_length": 1101.1708984375, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.2471226779744029, + "epoch": 0.1034357892943106, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12482727319002151, + "learning_rate": 1e-06, + "loss": 0.0129, + "num_tokens": 263274897.0, + "reward": 0.47265625, + "reward_std": 0.14502215385437012, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 607, + "tools/generated_tokens": 4319.9921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1299.6796875, + "completions/mean_terminated_length": 1075.5634765625, + "completions/min_length": 124.0, + "completions/min_terminated_length": 124.0, + "entropy": 0.26958257611840963, + "epoch": 0.10360619421901293, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17142032086849213, + "learning_rate": 1e-06, + "loss": 0.0147, + "num_tokens": 263694703.0, + "reward": 0.421875, + "reward_std": 0.24933947622776031, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 608, + "tools/generated_tokens": 4915.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1339.625, + "completions/mean_terminated_length": 1127.4771728515625, + "completions/min_length": 326.0, + "completions/min_terminated_length": 326.0, + "entropy": 0.3016281109303236, + "epoch": 0.10377659914371526, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.2015984207391739, + "learning_rate": 1e-06, + "loss": 0.0301, + "num_tokens": 264127599.0, + "reward": 0.51171875, + "reward_std": 0.36788105964660645, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 609, + "tools/generated_tokens": 4923.6328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.39453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1563.81640625, + "completions/mean_terminated_length": 1248.3289794921875, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.309479346498847, + "epoch": 0.10394700406841757, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.138493612408638, + "learning_rate": 1e-06, + "loss": 0.0069, + "num_tokens": 264604912.0, + "reward": 0.328125, + "reward_std": 0.15284234285354614, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 610, + "tools/generated_tokens": 5363.828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.85546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1327.5859375, + "completions/mean_terminated_length": 1152.7379150390625, + "completions/min_length": 95.0, + "completions/min_terminated_length": 95.0, + "entropy": 0.2610814590007067, + "epoch": 0.1041174089931199, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15999998152256012, + "learning_rate": 1e-06, + "loss": 0.034, + "num_tokens": 265025798.0, + "reward": 0.453125, + "reward_std": 0.23755928874015808, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 611, + "tools/generated_tokens": 4703.58984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1418.15625, + "completions/mean_terminated_length": 1137.0509033203125, + "completions/min_length": 198.0, + "completions/min_terminated_length": 198.0, + "entropy": 0.2829169724136591, + "epoch": 0.10428781391782223, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17679709196090698, + "learning_rate": 1e-06, + "loss": 0.0353, + "num_tokens": 265469566.0, + "reward": 0.375, + "reward_std": 0.2794036865234375, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 612, + "tools/generated_tokens": 5362.171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.92578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1340.7109375, + "completions/mean_terminated_length": 1173.294677734375, + "completions/min_length": 205.0, + "completions/min_terminated_length": 205.0, + "entropy": 0.26936994958668947, + "epoch": 0.10445821884252456, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1563178300857544, + "learning_rate": 1e-06, + "loss": 0.0342, + "num_tokens": 265894980.0, + "reward": 0.57421875, + "reward_std": 0.21658216416835785, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 613, + "tools/generated_tokens": 4540.72265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2005.0, + "completions/mean_length": 1345.85546875, + "completions/mean_terminated_length": 1238.31982421875, + "completions/min_length": 247.0, + "completions/min_terminated_length": 247.0, + "entropy": 0.2717377059161663, + "epoch": 0.10462862376722687, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18921178579330444, + "learning_rate": 1e-06, + "loss": 0.0413, + "num_tokens": 266306623.0, + "reward": 0.71875, + "reward_std": 0.2729611396789551, + "rewards/simpleverify_reward/mean": 0.71875, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 614, + "tools/generated_tokens": 4025.85546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1265.1484375, + "completions/mean_terminated_length": 1079.840576171875, + "completions/min_length": 103.0, + "completions/min_terminated_length": 103.0, + "entropy": 0.2637931974604726, + "epoch": 0.1047990286919292, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1937050074338913, + "learning_rate": 1e-06, + "loss": 0.026, + "num_tokens": 266707573.0, + "reward": 0.54296875, + "reward_std": 0.22175738215446472, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 615, + "tools/generated_tokens": 4193.15234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1381.265625, + "completions/mean_terminated_length": 1089.10107421875, + "completions/min_length": 270.0, + "completions/min_terminated_length": 270.0, + "entropy": 0.31916841957718134, + "epoch": 0.10496943361663152, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1596374660730362, + "learning_rate": 1e-06, + "loss": 0.0115, + "num_tokens": 267149529.0, + "reward": 0.26171875, + "reward_std": 0.2327008694410324, + "rewards/simpleverify_reward/mean": 0.26171875, + "rewards/simpleverify_reward/std": 0.4404313564300537, + "step": 616, + "tools/generated_tokens": 5253.26953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1441.234375, + "completions/mean_terminated_length": 1279.0445556640625, + "completions/min_length": 81.0, + "completions/min_terminated_length": 81.0, + "entropy": 0.28609442338347435, + "epoch": 0.10513983854133384, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.22561423480510712, + "learning_rate": 1e-06, + "loss": 0.0367, + "num_tokens": 267589349.0, + "reward": 0.6796875, + "reward_std": 0.18386822938919067, + "rewards/simpleverify_reward/mean": 0.6796875, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 617, + "tools/generated_tokens": 4337.2578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1442.87109375, + "completions/mean_terminated_length": 1236.9423828125, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.2818992603570223, + "epoch": 0.10531024346603617, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.12229091674089432, + "learning_rate": 1e-06, + "loss": -0.0134, + "num_tokens": 268048260.0, + "reward": 0.46875, + "reward_std": 0.19918768107891083, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 618, + "tools/generated_tokens": 5098.8828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.78515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1303.8515625, + "completions/mean_terminated_length": 1157.83642578125, + "completions/min_length": 71.0, + "completions/min_terminated_length": 71.0, + "entropy": 0.2666237447410822, + "epoch": 0.1054806483907385, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15779152512550354, + "learning_rate": 1e-06, + "loss": 0.0173, + "num_tokens": 268457326.0, + "reward": 0.625, + "reward_std": 0.2566280663013458, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 619, + "tools/generated_tokens": 4231.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1398.3671875, + "completions/mean_terminated_length": 1199.5101318359375, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.28670331183820963, + "epoch": 0.10565105331544082, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18112021684646606, + "learning_rate": 1e-06, + "loss": 0.0224, + "num_tokens": 268896060.0, + "reward": 0.50390625, + "reward_std": 0.31625896692276, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 620, + "tools/generated_tokens": 4774.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1314.05078125, + "completions/mean_terminated_length": 1161.7264404296875, + "completions/min_length": 228.0, + "completions/min_terminated_length": 228.0, + "entropy": 0.30507533717900515, + "epoch": 0.10582145824014313, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.1667071133852005, + "learning_rate": 1e-06, + "loss": 0.0215, + "num_tokens": 269318681.0, + "reward": 0.546875, + "reward_std": 0.2688092887401581, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 621, + "tools/generated_tokens": 4810.0625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.70703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1413.1328125, + "completions/mean_terminated_length": 1284.9765625, + "completions/min_length": 171.0, + "completions/min_terminated_length": 171.0, + "entropy": 0.23536482453346252, + "epoch": 0.10599186316484546, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14880546927452087, + "learning_rate": 1e-06, + "loss": 0.0118, + "num_tokens": 269755499.0, + "reward": 0.4921875, + "reward_std": 0.21251130104064941, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 622, + "tools/generated_tokens": 4341.1484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1376.671875, + "completions/mean_terminated_length": 1248.651123046875, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.2502811774611473, + "epoch": 0.10616226808954779, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1514146625995636, + "learning_rate": 1e-06, + "loss": 0.0182, + "num_tokens": 270193735.0, + "reward": 0.51953125, + "reward_std": 0.24031278491020203, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 623, + "tools/generated_tokens": 4480.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1443.97265625, + "completions/mean_terminated_length": 1188.93896484375, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.31538047548383474, + "epoch": 0.10633267301425012, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16263020038604736, + "learning_rate": 1e-06, + "loss": -0.0018, + "num_tokens": 270643968.0, + "reward": 0.38671875, + "reward_std": 0.18760645389556885, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 624, + "tools/generated_tokens": 5195.9765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.83203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1451.19140625, + "completions/mean_terminated_length": 1327.325439453125, + "completions/min_length": 292.0, + "completions/min_terminated_length": 292.0, + "entropy": 0.24140130449086428, + "epoch": 0.10650307793895243, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1506175845861435, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 271075569.0, + "reward": 0.59765625, + "reward_std": 0.26320207118988037, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 625, + "tools/generated_tokens": 3891.1953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1379.875, + "completions/mean_terminated_length": 1213.6732177734375, + "completions/min_length": 152.0, + "completions/min_terminated_length": 152.0, + "entropy": 0.2595217255875468, + "epoch": 0.10667348286365476, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.09836837649345398, + "learning_rate": 1e-06, + "loss": -0.0036, + "num_tokens": 271511185.0, + "reward": 0.38671875, + "reward_std": 0.12742365896701813, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 626, + "tools/generated_tokens": 4803.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1334.44921875, + "completions/mean_terminated_length": 1156.9366455078125, + "completions/min_length": 113.0, + "completions/min_terminated_length": 113.0, + "entropy": 0.2846803767606616, + "epoch": 0.10684388778835709, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20360346138477325, + "learning_rate": 1e-06, + "loss": 0.0557, + "num_tokens": 271942484.0, + "reward": 0.52734375, + "reward_std": 0.25221139192581177, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 627, + "tools/generated_tokens": 4574.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1323.11328125, + "completions/mean_terminated_length": 1147.1795654296875, + "completions/min_length": 226.0, + "completions/min_terminated_length": 226.0, + "entropy": 0.28598304837942123, + "epoch": 0.10701429271305941, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13000962138175964, + "learning_rate": 1e-06, + "loss": 0.0366, + "num_tokens": 272365921.0, + "reward": 0.55859375, + "reward_std": 0.22273029386997223, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 628, + "tools/generated_tokens": 4563.12109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1264.2421875, + "completions/mean_terminated_length": 1140.11767578125, + "completions/min_length": 208.0, + "completions/min_terminated_length": 208.0, + "entropy": 0.26456060726195574, + "epoch": 0.10718469763776173, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18791763484477997, + "learning_rate": 1e-06, + "loss": 0.0556, + "num_tokens": 272782175.0, + "reward": 0.69140625, + "reward_std": 0.29092884063720703, + "rewards/simpleverify_reward/mean": 0.69140625, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 629, + "tools/generated_tokens": 4424.24609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1994.0, + "completions/mean_length": 1219.8359375, + "completions/mean_terminated_length": 1088.6788330078125, + "completions/min_length": 67.0, + "completions/min_terminated_length": 67.0, + "entropy": 0.2776689175516367, + "epoch": 0.10735510256246406, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18204987049102783, + "learning_rate": 1e-06, + "loss": 0.0126, + "num_tokens": 273177717.0, + "reward": 0.484375, + "reward_std": 0.2574812173843384, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 630, + "tools/generated_tokens": 4147.8359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1435.2578125, + "completions/mean_terminated_length": 1255.7677001953125, + "completions/min_length": 221.0, + "completions/min_terminated_length": 221.0, + "entropy": 0.23397820256650448, + "epoch": 0.10752550748716638, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13757413625717163, + "learning_rate": 1e-06, + "loss": 0.0312, + "num_tokens": 273619207.0, + "reward": 0.42578125, + "reward_std": 0.2495477795600891, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 631, + "tools/generated_tokens": 4547.24609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1362.64453125, + "completions/mean_terminated_length": 1261.228759765625, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "entropy": 0.21338557358831167, + "epoch": 0.1076959124118687, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.13186493515968323, + "learning_rate": 1e-06, + "loss": 0.0135, + "num_tokens": 274036140.0, + "reward": 0.70703125, + "reward_std": 0.1438203752040863, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 632, + "tools/generated_tokens": 3578.65625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.08203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1378.7265625, + "completions/mean_terminated_length": 1164.8349609375, + "completions/min_length": 271.0, + "completions/min_terminated_length": 271.0, + "entropy": 0.2828444391489029, + "epoch": 0.10786631733657102, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1869775801897049, + "learning_rate": 1e-06, + "loss": 0.0254, + "num_tokens": 274472630.0, + "reward": 0.40625, + "reward_std": 0.2245136797428131, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 633, + "tools/generated_tokens": 4930.73046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1325.94140625, + "completions/mean_terminated_length": 1114.4293212890625, + "completions/min_length": 228.0, + "completions/min_terminated_length": 228.0, + "entropy": 0.32327297516167164, + "epoch": 0.10803672226127335, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2007516771554947, + "learning_rate": 1e-06, + "loss": 0.0409, + "num_tokens": 274901815.0, + "reward": 0.44140625, + "reward_std": 0.34267544746398926, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 634, + "tools/generated_tokens": 5349.953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.96484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1380.5078125, + "completions/mean_terminated_length": 1226.485595703125, + "completions/min_length": 345.0, + "completions/min_terminated_length": 345.0, + "entropy": 0.2673746030777693, + "epoch": 0.10820712718597568, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1623433232307434, + "learning_rate": 1e-06, + "loss": 0.0131, + "num_tokens": 275328969.0, + "reward": 0.39453125, + "reward_std": 0.18265536427497864, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 635, + "tools/generated_tokens": 4452.53125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1424.09375, + "completions/mean_terminated_length": 1145.6328125, + "completions/min_length": 90.0, + "completions/min_terminated_length": 90.0, + "entropy": 0.3063347237184644, + "epoch": 0.108377532110678, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.11974193900823593, + "learning_rate": 1e-06, + "loss": 0.0016, + "num_tokens": 275771329.0, + "reward": 0.3671875, + "reward_std": 0.2048833966255188, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 636, + "tools/generated_tokens": 5112.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1169.5, + "completions/mean_terminated_length": 1118.6776123046875, + "completions/min_length": 161.0, + "completions/min_terminated_length": 161.0, + "entropy": 0.27197619155049324, + "epoch": 0.10854793703538032, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18804891407489777, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 276148433.0, + "reward": 0.6484375, + "reward_std": 0.1624118983745575, + "rewards/simpleverify_reward/mean": 0.6484375, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 637, + "tools/generated_tokens": 3609.49609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.19140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1482.41796875, + "completions/mean_terminated_length": 1215.8792724609375, + "completions/min_length": 308.0, + "completions/min_terminated_length": 308.0, + "entropy": 0.2589757265523076, + "epoch": 0.10871834196008265, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.15880268812179565, + "learning_rate": 1e-06, + "loss": -0.0156, + "num_tokens": 276618396.0, + "reward": 0.4609375, + "reward_std": 0.13041725754737854, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 638, + "tools/generated_tokens": 5226.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1399.01953125, + "completions/mean_terminated_length": 1204.659912109375, + "completions/min_length": 144.0, + "completions/min_terminated_length": 144.0, + "entropy": 0.28624267783015966, + "epoch": 0.10888874688478498, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.171358123421669, + "learning_rate": 1e-06, + "loss": 0.0569, + "num_tokens": 277053121.0, + "reward": 0.52734375, + "reward_std": 0.29081130027770996, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 639, + "tools/generated_tokens": 4807.01953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1357.08203125, + "completions/mean_terminated_length": 1209.7298583984375, + "completions/min_length": 40.0, + "completions/min_terminated_length": 40.0, + "entropy": 0.2540802387520671, + "epoch": 0.10905915180948729, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14097262918949127, + "learning_rate": 1e-06, + "loss": 0.0199, + "num_tokens": 277480694.0, + "reward": 0.57421875, + "reward_std": 0.2199878990650177, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 640, + "tools/generated_tokens": 4229.15234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1367.78515625, + "completions/mean_terminated_length": 1121.7552490234375, + "completions/min_length": 209.0, + "completions/min_terminated_length": 209.0, + "entropy": 0.29023122135549784, + "epoch": 0.10922955673418962, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14141590893268585, + "learning_rate": 1e-06, + "loss": 0.0172, + "num_tokens": 277909999.0, + "reward": 0.5703125, + "reward_std": 0.16901493072509766, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 641, + "tools/generated_tokens": 4863.79296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.70703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1321.83984375, + "completions/mean_terminated_length": 1158.54541015625, + "completions/min_length": 244.0, + "completions/min_terminated_length": 244.0, + "entropy": 0.2669211020693183, + "epoch": 0.10939996165889194, + "frac_reward_zero_std": 0.0625, + "grad_norm": 0.1905251145362854, + "learning_rate": 1e-06, + "loss": 0.0207, + "num_tokens": 278335174.0, + "reward": 0.5859375, + "reward_std": 0.38027530908584595, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 642, + "tools/generated_tokens": 4841.84765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1393.015625, + "completions/mean_terminated_length": 1222.0196533203125, + "completions/min_length": 108.0, + "completions/min_terminated_length": 108.0, + "entropy": 0.28753375727683306, + "epoch": 0.10957036658359427, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1880209594964981, + "learning_rate": 1e-06, + "loss": 0.0206, + "num_tokens": 278784234.0, + "reward": 0.53515625, + "reward_std": 0.32094109058380127, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 643, + "tools/generated_tokens": 5273.03125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.89453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1283.921875, + "completions/mean_terminated_length": 1178.6533203125, + "completions/min_length": 240.0, + "completions/min_terminated_length": 240.0, + "entropy": 0.3149437680840492, + "epoch": 0.10974077150829659, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20286233723163605, + "learning_rate": 1e-06, + "loss": 0.0057, + "num_tokens": 279199222.0, + "reward": 0.3515625, + "reward_std": 0.2900395393371582, + "rewards/simpleverify_reward/mean": 0.3515625, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 644, + "tools/generated_tokens": 4731.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.68359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1315.46484375, + "completions/mean_terminated_length": 1128.7451171875, + "completions/min_length": 347.0, + "completions/min_terminated_length": 347.0, + "entropy": 0.23532930668443441, + "epoch": 0.10991117643299891, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.16920538246631622, + "learning_rate": 1e-06, + "loss": 0.0417, + "num_tokens": 279621165.0, + "reward": 0.5625, + "reward_std": 0.3102988600730896, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 645, + "tools/generated_tokens": 4867.48046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1283.63671875, + "completions/mean_terminated_length": 1189.767578125, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.2547568343579769, + "epoch": 0.11008158135770124, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.166712686419487, + "learning_rate": 1e-06, + "loss": -0.0026, + "num_tokens": 280022832.0, + "reward": 0.72265625, + "reward_std": 0.2599048614501953, + "rewards/simpleverify_reward/mean": 0.72265625, + "rewards/simpleverify_reward/std": 0.4485645890235901, + "step": 646, + "tools/generated_tokens": 3891.65234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1382.25390625, + "completions/mean_terminated_length": 1106.3978271484375, + "completions/min_length": 113.0, + "completions/min_terminated_length": 113.0, + "entropy": 0.2551482766866684, + "epoch": 0.11025198628240356, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1152617335319519, + "learning_rate": 1e-06, + "loss": 0.0169, + "num_tokens": 280451825.0, + "reward": 0.36328125, + "reward_std": 0.17473775148391724, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 647, + "tools/generated_tokens": 4974.265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1468.33203125, + "completions/mean_terminated_length": 1185.244140625, + "completions/min_length": 208.0, + "completions/min_terminated_length": 208.0, + "entropy": 0.29337296821177006, + "epoch": 0.11042239120710588, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15559512376785278, + "learning_rate": 1e-06, + "loss": 0.0193, + "num_tokens": 280907798.0, + "reward": 0.375, + "reward_std": 0.22383463382720947, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 648, + "tools/generated_tokens": 5316.328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2014.0, + "completions/mean_length": 1352.0234375, + "completions/mean_terminated_length": 1199.5810546875, + "completions/min_length": 252.0, + "completions/min_terminated_length": 252.0, + "entropy": 0.26699577923864126, + "epoch": 0.11059279613180821, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1800403594970703, + "learning_rate": 1e-06, + "loss": -0.0008, + "num_tokens": 281332316.0, + "reward": 0.37890625, + "reward_std": 0.29315072298049927, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 649, + "tools/generated_tokens": 4584.03125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1403.3671875, + "completions/mean_terminated_length": 1197.36083984375, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.3092813640832901, + "epoch": 0.11076320105651054, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.22655443847179413, + "learning_rate": 1e-06, + "loss": 0.007, + "num_tokens": 281785338.0, + "reward": 0.4140625, + "reward_std": 0.34595030546188354, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 650, + "tools/generated_tokens": 5331.41015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1372.65234375, + "completions/mean_terminated_length": 1243.8651123046875, + "completions/min_length": 120.0, + "completions/min_terminated_length": 120.0, + "entropy": 0.24570453632622957, + "epoch": 0.11093360598121285, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.19229960441589355, + "learning_rate": 1e-06, + "loss": 0.0379, + "num_tokens": 282206161.0, + "reward": 0.4296875, + "reward_std": 0.3614438474178314, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 651, + "tools/generated_tokens": 4068.65625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1399.1953125, + "completions/mean_terminated_length": 1271.869140625, + "completions/min_length": 242.0, + "completions/min_terminated_length": 242.0, + "entropy": 0.2655220804736018, + "epoch": 0.11110401090591518, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2009373903274536, + "learning_rate": 1e-06, + "loss": 0.0113, + "num_tokens": 282644275.0, + "reward": 0.671875, + "reward_std": 0.25789541006088257, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 652, + "tools/generated_tokens": 4471.20703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1279.20703125, + "completions/mean_terminated_length": 1157.4525146484375, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.2848825789988041, + "epoch": 0.1112744158306175, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.45934826135635376, + "learning_rate": 1e-06, + "loss": 0.0318, + "num_tokens": 283052248.0, + "reward": 0.62109375, + "reward_std": 0.2815985083580017, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 653, + "tools/generated_tokens": 4223.2109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1350.4375, + "completions/mean_terminated_length": 1146.1162109375, + "completions/min_length": 215.0, + "completions/min_terminated_length": 215.0, + "entropy": 0.2441366296261549, + "epoch": 0.11144482075531983, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.19023865461349487, + "learning_rate": 1e-06, + "loss": -0.0051, + "num_tokens": 283476136.0, + "reward": 0.6328125, + "reward_std": 0.25418204069137573, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 654, + "tools/generated_tokens": 4622.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1433.6328125, + "completions/mean_terminated_length": 1188.557373046875, + "completions/min_length": 62.0, + "completions/min_terminated_length": 62.0, + "entropy": 0.2219137530773878, + "epoch": 0.11161522568002215, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16890238225460052, + "learning_rate": 1e-06, + "loss": 0.037, + "num_tokens": 283929194.0, + "reward": 0.4375, + "reward_std": 0.31049323081970215, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 655, + "tools/generated_tokens": 5113.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1378.3828125, + "completions/mean_terminated_length": 1150.502685546875, + "completions/min_length": 182.0, + "completions/min_terminated_length": 182.0, + "entropy": 0.3080749027431011, + "epoch": 0.11178563060472448, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.19281238317489624, + "learning_rate": 1e-06, + "loss": 0.0428, + "num_tokens": 284374156.0, + "reward": 0.4765625, + "reward_std": 0.32010379433631897, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 656, + "tools/generated_tokens": 5298.38671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2011.0, + "completions/mean_length": 1335.77734375, + "completions/mean_terminated_length": 1029.4022216796875, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.28125489316880703, + "epoch": 0.1119560355294268, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14651690423488617, + "learning_rate": 1e-06, + "loss": 0.0199, + "num_tokens": 284806371.0, + "reward": 0.3125, + "reward_std": 0.16406384110450745, + "rewards/simpleverify_reward/mean": 0.3125, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 657, + "tools/generated_tokens": 5279.7890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.92578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1250.56640625, + "completions/mean_terminated_length": 1156.5458984375, + "completions/min_length": 236.0, + "completions/min_terminated_length": 236.0, + "entropy": 0.2504276493564248, + "epoch": 0.11212644045412913, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15595729649066925, + "learning_rate": 1e-06, + "loss": 0.0225, + "num_tokens": 285204836.0, + "reward": 0.57421875, + "reward_std": 0.2115791141986847, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 658, + "tools/generated_tokens": 3930.5703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1384.0, + "completions/mean_terminated_length": 1148.61376953125, + "completions/min_length": 196.0, + "completions/min_terminated_length": 196.0, + "entropy": 0.2641846025362611, + "epoch": 0.11229684537883144, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17719745635986328, + "learning_rate": 1e-06, + "loss": 0.052, + "num_tokens": 285653396.0, + "reward": 0.4140625, + "reward_std": 0.29392436146736145, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 659, + "tools/generated_tokens": 5304.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.32421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1478.26953125, + "completions/mean_terminated_length": 1204.9364013671875, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.2594412565231323, + "epoch": 0.11246725030353377, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1273634135723114, + "learning_rate": 1e-06, + "loss": 0.0096, + "num_tokens": 286108489.0, + "reward": 0.46484375, + "reward_std": 0.15309548377990723, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 660, + "tools/generated_tokens": 4774.2734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1349.515625, + "completions/mean_terminated_length": 1188.331787109375, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.26714857015758753, + "epoch": 0.1126376552282361, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18857212364673615, + "learning_rate": 1e-06, + "loss": 0.0122, + "num_tokens": 286532477.0, + "reward": 0.39453125, + "reward_std": 0.27077996730804443, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 661, + "tools/generated_tokens": 4765.5234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1258.6484375, + "completions/mean_terminated_length": 1137.7657470703125, + "completions/min_length": 189.0, + "completions/min_terminated_length": 189.0, + "entropy": 0.2908195350319147, + "epoch": 0.11280806015293841, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2098398655653, + "learning_rate": 1e-06, + "loss": 0.0147, + "num_tokens": 286939363.0, + "reward": 0.40234375, + "reward_std": 0.28387558460235596, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 662, + "tools/generated_tokens": 4466.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1212.98046875, + "completions/mean_terminated_length": 1122.61474609375, + "completions/min_length": 256.0, + "completions/min_terminated_length": 256.0, + "entropy": 0.2730645714327693, + "epoch": 0.11297846507764074, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18227653205394745, + "learning_rate": 1e-06, + "loss": 0.0048, + "num_tokens": 287322398.0, + "reward": 0.55859375, + "reward_std": 0.26917997002601624, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 663, + "tools/generated_tokens": 3732.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.23046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1478.234375, + "completions/mean_terminated_length": 1268.00537109375, + "completions/min_length": 329.0, + "completions/min_terminated_length": 329.0, + "entropy": 0.27151405811309814, + "epoch": 0.11314887000234307, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.30221056938171387, + "learning_rate": 1e-06, + "loss": 0.034, + "num_tokens": 287789690.0, + "reward": 0.3671875, + "reward_std": 0.263522744178772, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 664, + "tools/generated_tokens": 5406.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1293.9765625, + "completions/mean_terminated_length": 1128.8095703125, + "completions/min_length": 227.0, + "completions/min_terminated_length": 227.0, + "entropy": 0.27544736210256815, + "epoch": 0.1133192749270454, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.6790370941162109, + "learning_rate": 1e-06, + "loss": 0.0258, + "num_tokens": 288210468.0, + "reward": 0.46484375, + "reward_std": 0.29674431681632996, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 665, + "tools/generated_tokens": 4933.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1331.7265625, + "completions/mean_terminated_length": 1097.9171142578125, + "completions/min_length": 231.0, + "completions/min_terminated_length": 231.0, + "entropy": 0.23813448939472437, + "epoch": 0.11348967985174771, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15418995916843414, + "learning_rate": 1e-06, + "loss": 0.0461, + "num_tokens": 288640190.0, + "reward": 0.6328125, + "reward_std": 0.2523040473461151, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 666, + "tools/generated_tokens": 4843.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1196.55859375, + "completions/mean_terminated_length": 1087.7840576171875, + "completions/min_length": 51.0, + "completions/min_terminated_length": 51.0, + "entropy": 0.3058233577758074, + "epoch": 0.11366008477645004, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15078438818454742, + "learning_rate": 1e-06, + "loss": 0.0207, + "num_tokens": 289018029.0, + "reward": 0.5390625, + "reward_std": 0.19994549453258514, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 667, + "tools/generated_tokens": 3532.56640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1408.40234375, + "completions/mean_terminated_length": 1221.050537109375, + "completions/min_length": 374.0, + "completions/min_terminated_length": 374.0, + "entropy": 0.2560304347425699, + "epoch": 0.11383048970115237, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.1727852076292038, + "learning_rate": 1e-06, + "loss": 0.0189, + "num_tokens": 289479988.0, + "reward": 0.3984375, + "reward_std": 0.30986616015434265, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 668, + "tools/generated_tokens": 5016.4140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1272.8359375, + "completions/mean_terminated_length": 1098.5167236328125, + "completions/min_length": 135.0, + "completions/min_terminated_length": 135.0, + "entropy": 0.24627330992370844, + "epoch": 0.11400089462585469, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14643415808677673, + "learning_rate": 1e-06, + "loss": 0.0214, + "num_tokens": 289885786.0, + "reward": 0.5625, + "reward_std": 0.2265685796737671, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 669, + "tools/generated_tokens": 4368.84765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1236.69140625, + "completions/mean_terminated_length": 1137.0745849609375, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.27028775587677956, + "epoch": 0.114171299550557, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14379100501537323, + "learning_rate": 1e-06, + "loss": 0.0023, + "num_tokens": 290281147.0, + "reward": 0.7109375, + "reward_std": 0.21433347463607788, + "rewards/simpleverify_reward/mean": 0.7109375, + "rewards/simpleverify_reward/std": 0.45421501994132996, + "step": 670, + "tools/generated_tokens": 4028.703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1269.59375, + "completions/mean_terminated_length": 1108.0377197265625, + "completions/min_length": 293.0, + "completions/min_terminated_length": 293.0, + "entropy": 0.25096935499459505, + "epoch": 0.11434170447525933, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15770353376865387, + "learning_rate": 1e-06, + "loss": 0.032, + "num_tokens": 290695283.0, + "reward": 0.55078125, + "reward_std": 0.22028236091136932, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 671, + "tools/generated_tokens": 4477.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1248.41796875, + "completions/mean_terminated_length": 1087.0, + "completions/min_length": 120.0, + "completions/min_terminated_length": 120.0, + "entropy": 0.2772094663232565, + "epoch": 0.11451210939996166, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1849849820137024, + "learning_rate": 1e-06, + "loss": 0.0025, + "num_tokens": 291104094.0, + "reward": 0.40625, + "reward_std": 0.26416581869125366, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 672, + "tools/generated_tokens": 4712.40234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1195.19921875, + "completions/mean_terminated_length": 1094.650634765625, + "completions/min_length": 150.0, + "completions/min_terminated_length": 150.0, + "entropy": 0.2656153868883848, + "epoch": 0.11468251432466399, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18833574652671814, + "learning_rate": 1e-06, + "loss": -0.0117, + "num_tokens": 291495249.0, + "reward": 0.52734375, + "reward_std": 0.300828218460083, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 673, + "tools/generated_tokens": 4363.21484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1407.12109375, + "completions/mean_terminated_length": 1175.3138427734375, + "completions/min_length": 87.0, + "completions/min_terminated_length": 87.0, + "entropy": 0.2654810417443514, + "epoch": 0.1148529192493663, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.18689681589603424, + "learning_rate": 1e-06, + "loss": 0.0369, + "num_tokens": 291934592.0, + "reward": 0.453125, + "reward_std": 0.3470836579799652, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 674, + "tools/generated_tokens": 4823.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1353.97265625, + "completions/mean_terminated_length": 1159.64501953125, + "completions/min_length": 163.0, + "completions/min_terminated_length": 163.0, + "entropy": 0.2787818741053343, + "epoch": 0.11502332417406863, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.15539275109767914, + "learning_rate": 1e-06, + "loss": 0.0198, + "num_tokens": 292371289.0, + "reward": 0.53515625, + "reward_std": 0.23942145705223083, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 675, + "tools/generated_tokens": 5033.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1319.33984375, + "completions/mean_terminated_length": 1151.1971435546875, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.2657048776745796, + "epoch": 0.11519372909877096, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18021747469902039, + "learning_rate": 1e-06, + "loss": 0.0122, + "num_tokens": 292791952.0, + "reward": 0.40234375, + "reward_std": 0.260199636220932, + "rewards/simpleverify_reward/mean": 0.40234375, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 676, + "tools/generated_tokens": 4383.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1340.1484375, + "completions/mean_terminated_length": 1176.8173828125, + "completions/min_length": 222.0, + "completions/min_terminated_length": 222.0, + "entropy": 0.21105117443948984, + "epoch": 0.11536413402347327, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16563092172145844, + "learning_rate": 1e-06, + "loss": 0.0177, + "num_tokens": 293213606.0, + "reward": 0.62109375, + "reward_std": 0.30369094014167786, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 677, + "tools/generated_tokens": 4212.1640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1331.52734375, + "completions/mean_terminated_length": 1148.9019775390625, + "completions/min_length": 41.0, + "completions/min_terminated_length": 41.0, + "entropy": 0.2592040905728936, + "epoch": 0.1155345389481756, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17642486095428467, + "learning_rate": 1e-06, + "loss": 0.0382, + "num_tokens": 293634125.0, + "reward": 0.59375, + "reward_std": 0.23083871603012085, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 678, + "tools/generated_tokens": 4603.5390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1387.69921875, + "completions/mean_terminated_length": 1194.302978515625, + "completions/min_length": 90.0, + "completions/min_terminated_length": 90.0, + "entropy": 0.23489708360284567, + "epoch": 0.11570494387287793, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.13390317559242249, + "learning_rate": 1e-06, + "loss": 0.0317, + "num_tokens": 294065840.0, + "reward": 0.4609375, + "reward_std": 0.16470219194889069, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 679, + "tools/generated_tokens": 4459.7421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1468.8046875, + "completions/mean_terminated_length": 1195.8563232421875, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.2739240461960435, + "epoch": 0.11587534879758025, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17326202988624573, + "learning_rate": 1e-06, + "loss": 0.0209, + "num_tokens": 294518750.0, + "reward": 0.45703125, + "reward_std": 0.2021270990371704, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 680, + "tools/generated_tokens": 5108.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1340.69140625, + "completions/mean_terminated_length": 1169.0145263671875, + "completions/min_length": 355.0, + "completions/min_terminated_length": 355.0, + "entropy": 0.27234411612153053, + "epoch": 0.11604575372228257, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18925684690475464, + "learning_rate": 1e-06, + "loss": 0.0357, + "num_tokens": 294951855.0, + "reward": 0.46484375, + "reward_std": 0.19278642535209656, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 681, + "tools/generated_tokens": 4924.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1425.78515625, + "completions/mean_terminated_length": 1333.717529296875, + "completions/min_length": 275.0, + "completions/min_terminated_length": 275.0, + "entropy": 0.21625436283648014, + "epoch": 0.1162161586469849, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1252322494983673, + "learning_rate": 1e-06, + "loss": 0.0216, + "num_tokens": 295381896.0, + "reward": 0.75390625, + "reward_std": 0.22438223659992218, + "rewards/simpleverify_reward/mean": 0.75390625, + "rewards/simpleverify_reward/std": 0.43157756328582764, + "step": 682, + "tools/generated_tokens": 3857.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1298.3671875, + "completions/mean_terminated_length": 1142.7877197265625, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.22653070464730263, + "epoch": 0.11638656357168722, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.15767717361450195, + "learning_rate": 1e-06, + "loss": 0.0129, + "num_tokens": 295794966.0, + "reward": 0.56640625, + "reward_std": 0.27572914958000183, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 683, + "tools/generated_tokens": 4322.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1334.33203125, + "completions/mean_terminated_length": 1156.804931640625, + "completions/min_length": 118.0, + "completions/min_terminated_length": 118.0, + "entropy": 0.24402422830462456, + "epoch": 0.11655696849638955, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15608720481395721, + "learning_rate": 1e-06, + "loss": 0.0107, + "num_tokens": 296211579.0, + "reward": 0.48828125, + "reward_std": 0.2114706039428711, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 684, + "tools/generated_tokens": 4678.3515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1433.1484375, + "completions/mean_terminated_length": 1122.12353515625, + "completions/min_length": 186.0, + "completions/min_terminated_length": 186.0, + "entropy": 0.2377507919445634, + "epoch": 0.11672737342109187, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.21769921481609344, + "learning_rate": 1e-06, + "loss": 0.0011, + "num_tokens": 296663313.0, + "reward": 0.45703125, + "reward_std": 0.2530073821544647, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 685, + "tools/generated_tokens": 5337.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.90625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1391.33984375, + "completions/mean_terminated_length": 1163.2369384765625, + "completions/min_length": 167.0, + "completions/min_terminated_length": 167.0, + "entropy": 0.2597609106451273, + "epoch": 0.11689777834579419, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16523295640945435, + "learning_rate": 1e-06, + "loss": 0.0139, + "num_tokens": 297101864.0, + "reward": 0.51953125, + "reward_std": 0.22704584896564484, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 686, + "tools/generated_tokens": 4775.33984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1502.62109375, + "completions/mean_terminated_length": 1280.8846435546875, + "completions/min_length": 162.0, + "completions/min_terminated_length": 162.0, + "entropy": 0.24336642771959305, + "epoch": 0.11706818327049652, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14157827198505402, + "learning_rate": 1e-06, + "loss": 0.0157, + "num_tokens": 297562263.0, + "reward": 0.49609375, + "reward_std": 0.19090843200683594, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 687, + "tools/generated_tokens": 4734.6328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1234.9375, + "completions/mean_terminated_length": 1070.8028564453125, + "completions/min_length": 9.0, + "completions/min_terminated_length": 9.0, + "entropy": 0.25796001125127077, + "epoch": 0.11723858819519885, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18083597719669342, + "learning_rate": 1e-06, + "loss": 0.0215, + "num_tokens": 297953623.0, + "reward": 0.41015625, + "reward_std": 0.2173290103673935, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 688, + "tools/generated_tokens": 4298.9453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1457.578125, + "completions/mean_terminated_length": 1194.073486328125, + "completions/min_length": 255.0, + "completions/min_terminated_length": 255.0, + "entropy": 0.2565632164478302, + "epoch": 0.11740899311990116, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.1800205558538437, + "learning_rate": 1e-06, + "loss": 0.0088, + "num_tokens": 298415019.0, + "reward": 0.578125, + "reward_std": 0.29830074310302734, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 689, + "tools/generated_tokens": 5321.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.88671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1353.52734375, + "completions/mean_terminated_length": 1140.938720703125, + "completions/min_length": 138.0, + "completions/min_terminated_length": 138.0, + "entropy": 0.25891492888331413, + "epoch": 0.11757939804460349, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15302583575248718, + "learning_rate": 1e-06, + "loss": 0.034, + "num_tokens": 298848098.0, + "reward": 0.42578125, + "reward_std": 0.16415652632713318, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 690, + "tools/generated_tokens": 4897.52734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1498.70703125, + "completions/mean_terminated_length": 1249.0284423828125, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.2533171446993947, + "epoch": 0.11774980296930582, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.13147640228271484, + "learning_rate": 1e-06, + "loss": 0.0304, + "num_tokens": 299305415.0, + "reward": 0.41796875, + "reward_std": 0.24447914958000183, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 691, + "tools/generated_tokens": 4906.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1342.39453125, + "completions/mean_terminated_length": 1162.563720703125, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "entropy": 0.20845069084316492, + "epoch": 0.11792020789400813, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14937786757946014, + "learning_rate": 1e-06, + "loss": -0.0029, + "num_tokens": 299720140.0, + "reward": 0.47265625, + "reward_std": 0.2062118798494339, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 692, + "tools/generated_tokens": 4318.4453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1480.01171875, + "completions/mean_terminated_length": 1257.771728515625, + "completions/min_length": 138.0, + "completions/min_terminated_length": 138.0, + "entropy": 0.20788600947707891, + "epoch": 0.11809061281871046, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1496376246213913, + "learning_rate": 1e-06, + "loss": 0.0196, + "num_tokens": 300176031.0, + "reward": 0.58203125, + "reward_std": 0.2978776693344116, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 693, + "tools/generated_tokens": 4904.03125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1348.3359375, + "completions/mean_terminated_length": 1090.171142578125, + "completions/min_length": 108.0, + "completions/min_terminated_length": 108.0, + "entropy": 0.3026274088770151, + "epoch": 0.11826101774341279, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17613011598587036, + "learning_rate": 1e-06, + "loss": 0.0267, + "num_tokens": 300613333.0, + "reward": 0.34765625, + "reward_std": 0.2727735638618469, + "rewards/simpleverify_reward/mean": 0.34765625, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 694, + "tools/generated_tokens": 5364.34375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1400.03125, + "completions/mean_terminated_length": 1222.7313232421875, + "completions/min_length": 305.0, + "completions/min_terminated_length": 305.0, + "entropy": 0.26990509778261185, + "epoch": 0.11843142266811511, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17711031436920166, + "learning_rate": 1e-06, + "loss": 0.0252, + "num_tokens": 301056621.0, + "reward": 0.53125, + "reward_std": 0.21864622831344604, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 695, + "tools/generated_tokens": 5104.02734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2012.0, + "completions/mean_length": 1370.99609375, + "completions/mean_terminated_length": 1095.75830078125, + "completions/min_length": 177.0, + "completions/min_terminated_length": 177.0, + "entropy": 0.23035681061446667, + "epoch": 0.11860182759281743, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1524369716644287, + "learning_rate": 1e-06, + "loss": 0.0249, + "num_tokens": 301494636.0, + "reward": 0.5, + "reward_std": 0.19760414958000183, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 696, + "tools/generated_tokens": 4899.015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.72265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1338.0625, + "completions/mean_terminated_length": 1148.2772216796875, + "completions/min_length": 181.0, + "completions/min_terminated_length": 181.0, + "entropy": 0.23033000621944666, + "epoch": 0.11877223251751975, + "frac_reward_zero_std": 0.0625, + "grad_norm": 0.22331736981868744, + "learning_rate": 1e-06, + "loss": 0.0292, + "num_tokens": 301923404.0, + "reward": 0.59765625, + "reward_std": 0.41821908950805664, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 697, + "tools/generated_tokens": 4842.07421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1406.28125, + "completions/mean_terminated_length": 1261.97119140625, + "completions/min_length": 253.0, + "completions/min_terminated_length": 253.0, + "entropy": 0.22601748164743185, + "epoch": 0.11894263744222208, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15356329083442688, + "learning_rate": 1e-06, + "loss": 0.0176, + "num_tokens": 302363300.0, + "reward": 0.453125, + "reward_std": 0.22843991219997406, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 698, + "tools/generated_tokens": 4582.28515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1260.859375, + "completions/mean_terminated_length": 1168.0523681640625, + "completions/min_length": 160.0, + "completions/min_terminated_length": 160.0, + "entropy": 0.24092142656445503, + "epoch": 0.11911304236692441, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1545473337173462, + "learning_rate": 1e-06, + "loss": -0.0011, + "num_tokens": 302758256.0, + "reward": 0.61328125, + "reward_std": 0.2468073070049286, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 699, + "tools/generated_tokens": 3692.86328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1433.78515625, + "completions/mean_terminated_length": 1184.054931640625, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.25013092439621687, + "epoch": 0.11928344729162672, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.13353323936462402, + "learning_rate": 1e-06, + "loss": 0.017, + "num_tokens": 303212809.0, + "reward": 0.5078125, + "reward_std": 0.21973668038845062, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 700, + "tools/generated_tokens": 5297.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.88671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1309.0078125, + "completions/mean_terminated_length": 1092.535400390625, + "completions/min_length": 194.0, + "completions/min_terminated_length": 194.0, + "entropy": 0.2741047888994217, + "epoch": 0.11945385221632905, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2745325565338135, + "learning_rate": 1e-06, + "loss": 0.0262, + "num_tokens": 303633243.0, + "reward": 0.23046875, + "reward_std": 0.22974532842636108, + "rewards/simpleverify_reward/mean": 0.23046875, + "rewards/simpleverify_reward/std": 0.4219578504562378, + "step": 701, + "tools/generated_tokens": 5077.01953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.83984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1444.83203125, + "completions/mean_terminated_length": 1139.7353515625, + "completions/min_length": 127.0, + "completions/min_terminated_length": 127.0, + "entropy": 0.2707134699448943, + "epoch": 0.11962425714103138, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.16257143020629883, + "learning_rate": 1e-06, + "loss": 0.0182, + "num_tokens": 304077776.0, + "reward": 0.4375, + "reward_std": 0.21388331055641174, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 702, + "tools/generated_tokens": 4956.859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1345.25, + "completions/mean_terminated_length": 1244.87060546875, + "completions/min_length": 141.0, + "completions/min_terminated_length": 141.0, + "entropy": 0.21007448062300682, + "epoch": 0.1197946620657337, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.13849498331546783, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 304489776.0, + "reward": 0.609375, + "reward_std": 0.2241290807723999, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 703, + "tools/generated_tokens": 3713.26953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.15625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1321.91015625, + "completions/mean_terminated_length": 1179.4111328125, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.21439216658473015, + "epoch": 0.11996506699043602, + "frac_reward_zero_std": 0.8125, + "grad_norm": 0.0818270817399025, + "learning_rate": 1e-06, + "loss": 0.008, + "num_tokens": 304891865.0, + "reward": 0.61328125, + "reward_std": 0.09122256934642792, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 704, + "tools/generated_tokens": 3377.91015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.00390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1415.1953125, + "completions/mean_terminated_length": 1242.0447998046875, + "completions/min_length": 103.0, + "completions/min_terminated_length": 103.0, + "entropy": 0.2485204804688692, + "epoch": 0.12013547191513835, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.14054827392101288, + "learning_rate": 1e-06, + "loss": 0.0321, + "num_tokens": 305346059.0, + "reward": 0.56640625, + "reward_std": 0.25507354736328125, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 705, + "tools/generated_tokens": 4535.20703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1341.36328125, + "completions/mean_terminated_length": 1095.9105224609375, + "completions/min_length": 240.0, + "completions/min_terminated_length": 240.0, + "entropy": 0.25401805620640516, + "epoch": 0.12030587683984068, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15206822752952576, + "learning_rate": 1e-06, + "loss": 0.0322, + "num_tokens": 305778264.0, + "reward": 0.5, + "reward_std": 0.22327595949172974, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 706, + "tools/generated_tokens": 4981.38671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1274.87890625, + "completions/mean_terminated_length": 1096.4759521484375, + "completions/min_length": 162.0, + "completions/min_terminated_length": 162.0, + "entropy": 0.2554048392921686, + "epoch": 0.12047628176454299, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14919979870319366, + "learning_rate": 1e-06, + "loss": 0.0104, + "num_tokens": 306180441.0, + "reward": 0.47265625, + "reward_std": 0.20223368704319, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 707, + "tools/generated_tokens": 4170.8828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1278.359375, + "completions/mean_terminated_length": 1168.4107666015625, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.259488970041275, + "epoch": 0.12064668668924532, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15932126343250275, + "learning_rate": 1e-06, + "loss": 0.0098, + "num_tokens": 306576677.0, + "reward": 0.5859375, + "reward_std": 0.16713693737983704, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 708, + "tools/generated_tokens": 3806.359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1360.66015625, + "completions/mean_terminated_length": 1064.98876953125, + "completions/min_length": 72.0, + "completions/min_terminated_length": 72.0, + "entropy": 0.2471571397036314, + "epoch": 0.12081709161394764, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17095567286014557, + "learning_rate": 1e-06, + "loss": 0.0218, + "num_tokens": 307009278.0, + "reward": 0.484375, + "reward_std": 0.20851992070674896, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 709, + "tools/generated_tokens": 5192.66015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1168.7421875, + "completions/mean_terminated_length": 976.1571655273438, + "completions/min_length": 143.0, + "completions/min_terminated_length": 143.0, + "entropy": 0.24509234726428986, + "epoch": 0.12098749653864997, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12865740060806274, + "learning_rate": 1e-06, + "loss": -0.0094, + "num_tokens": 307387116.0, + "reward": 0.49609375, + "reward_std": 0.13039018213748932, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 710, + "tools/generated_tokens": 4024.7578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1396.953125, + "completions/mean_terminated_length": 1188.8917236328125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "entropy": 0.26750652492046356, + "epoch": 0.12115790146335229, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.19605718553066254, + "learning_rate": 1e-06, + "loss": 0.0122, + "num_tokens": 307823776.0, + "reward": 0.5546875, + "reward_std": 0.3256245255470276, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 711, + "tools/generated_tokens": 4684.95703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1311.0234375, + "completions/mean_terminated_length": 1162.244140625, + "completions/min_length": 46.0, + "completions/min_terminated_length": 46.0, + "entropy": 0.25421780720353127, + "epoch": 0.12132830638805461, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1831713616847992, + "learning_rate": 1e-06, + "loss": 0.0263, + "num_tokens": 308240918.0, + "reward": 0.58984375, + "reward_std": 0.23776951432228088, + "rewards/simpleverify_reward/mean": 0.58984375, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 712, + "tools/generated_tokens": 4343.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1356.37890625, + "completions/mean_terminated_length": 1158.2813720703125, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.25517632253468037, + "epoch": 0.12149871131275694, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14055019617080688, + "learning_rate": 1e-06, + "loss": 0.0246, + "num_tokens": 308660855.0, + "reward": 0.546875, + "reward_std": 0.18968652188777924, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 713, + "tools/generated_tokens": 4332.3828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1404.41796875, + "completions/mean_terminated_length": 1198.7421875, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.24821795243769884, + "epoch": 0.12166911623745927, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1955830156803131, + "learning_rate": 1e-06, + "loss": 0.0069, + "num_tokens": 309103378.0, + "reward": 0.51953125, + "reward_std": 0.260877788066864, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 714, + "tools/generated_tokens": 4692.421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2016.0, + "completions/mean_length": 1353.8984375, + "completions/mean_terminated_length": 1136.769287109375, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.24956323858350515, + "epoch": 0.12183952116216158, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.26573148369789124, + "learning_rate": 1e-06, + "loss": 0.0268, + "num_tokens": 309527944.0, + "reward": 0.4609375, + "reward_std": 0.275407612323761, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 715, + "tools/generated_tokens": 4561.91015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.56640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.36328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1445.375, + "completions/mean_terminated_length": 1101.57666015625, + "completions/min_length": 258.0, + "completions/min_terminated_length": 258.0, + "entropy": 0.27512555941939354, + "epoch": 0.12200992608686391, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.141936793923378, + "learning_rate": 1e-06, + "loss": 0.0286, + "num_tokens": 309988312.0, + "reward": 0.41796875, + "reward_std": 0.23292973637580872, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 716, + "tools/generated_tokens": 5589.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1284.85546875, + "completions/mean_terminated_length": 1143.532470703125, + "completions/min_length": 251.0, + "completions/min_terminated_length": 251.0, + "entropy": 0.24519119411706924, + "epoch": 0.12218033101156624, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1790136843919754, + "learning_rate": 1e-06, + "loss": 0.0138, + "num_tokens": 310399795.0, + "reward": 0.53125, + "reward_std": 0.25110703706741333, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 717, + "tools/generated_tokens": 4508.86328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.33203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2012.0, + "completions/mean_length": 1366.08984375, + "completions/mean_terminated_length": 1027.140380859375, + "completions/min_length": 239.0, + "completions/min_terminated_length": 239.0, + "entropy": 0.27409070543944836, + "epoch": 0.12235073593626856, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15358015894889832, + "learning_rate": 1e-06, + "loss": 0.0296, + "num_tokens": 310837130.0, + "reward": 0.3828125, + "reward_std": 0.189048171043396, + "rewards/simpleverify_reward/mean": 0.3828125, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 718, + "tools/generated_tokens": 5374.10546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.95703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1412.98828125, + "completions/mean_terminated_length": 1129.5762939453125, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.24863294791430235, + "epoch": 0.12252114086097088, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.9129813313484192, + "learning_rate": 1e-06, + "loss": 0.0326, + "num_tokens": 311279415.0, + "reward": 0.296875, + "reward_std": 0.23339098691940308, + "rewards/simpleverify_reward/mean": 0.296875, + "rewards/simpleverify_reward/std": 0.45777595043182373, + "step": 719, + "tools/generated_tokens": 5149.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.82421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.28515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1457.515625, + "completions/mean_terminated_length": 1221.9835205078125, + "completions/min_length": 177.0, + "completions/min_terminated_length": 177.0, + "entropy": 0.2301028361544013, + "epoch": 0.1226915457856732, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.14140602946281433, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 311734571.0, + "reward": 0.578125, + "reward_std": 0.24382495880126953, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 720, + "tools/generated_tokens": 5281.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1244.66796875, + "completions/mean_terminated_length": 1121.6396484375, + "completions/min_length": 219.0, + "completions/min_terminated_length": 219.0, + "entropy": 0.2301520025357604, + "epoch": 0.12286195071037553, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17017489671707153, + "learning_rate": 1e-06, + "loss": 0.0017, + "num_tokens": 312135206.0, + "reward": 0.43359375, + "reward_std": 0.25967881083488464, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 721, + "tools/generated_tokens": 4140.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1364.95703125, + "completions/mean_terminated_length": 1160.390869140625, + "completions/min_length": 213.0, + "completions/min_terminated_length": 213.0, + "entropy": 0.22799314465373755, + "epoch": 0.12303235563507785, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1625974029302597, + "learning_rate": 1e-06, + "loss": 0.0549, + "num_tokens": 312562875.0, + "reward": 0.40625, + "reward_std": 0.2394353747367859, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 722, + "tools/generated_tokens": 4492.9765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.52734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1318.16015625, + "completions/mean_terminated_length": 1145.3961181640625, + "completions/min_length": 240.0, + "completions/min_terminated_length": 240.0, + "entropy": 0.25876136031001806, + "epoch": 0.12320276055978018, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.16170518100261688, + "learning_rate": 1e-06, + "loss": 0.0373, + "num_tokens": 312981524.0, + "reward": 0.578125, + "reward_std": 0.25218653678894043, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 723, + "tools/generated_tokens": 4622.15234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.61328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1262.41796875, + "completions/mean_terminated_length": 1085.7559814453125, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.24038258753716946, + "epoch": 0.1233731654844825, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.189750537276268, + "learning_rate": 1e-06, + "loss": 0.0328, + "num_tokens": 313394239.0, + "reward": 0.53515625, + "reward_std": 0.2747562527656555, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 724, + "tools/generated_tokens": 4326.41015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.49609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1360.71484375, + "completions/mean_terminated_length": 1102.0699462890625, + "completions/min_length": 74.0, + "completions/min_terminated_length": 74.0, + "entropy": 0.24909261241555214, + "epoch": 0.12354357040918483, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18744732439517975, + "learning_rate": 1e-06, + "loss": 0.0272, + "num_tokens": 313821094.0, + "reward": 0.48046875, + "reward_std": 0.2504550814628601, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 725, + "tools/generated_tokens": 4976.7265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.32421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1441.70703125, + "completions/mean_terminated_length": 1150.838134765625, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.22987970151007175, + "epoch": 0.12371397533388714, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.11026185005903244, + "learning_rate": 1e-06, + "loss": 0.0189, + "num_tokens": 314274395.0, + "reward": 0.44140625, + "reward_std": 0.1156454086303711, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 726, + "tools/generated_tokens": 5009.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1479.96875, + "completions/mean_terminated_length": 1202.56396484375, + "completions/min_length": 230.0, + "completions/min_terminated_length": 230.0, + "entropy": 0.25264427438378334, + "epoch": 0.12388438025858947, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.27290549874305725, + "learning_rate": 1e-06, + "loss": 0.0054, + "num_tokens": 314741587.0, + "reward": 0.3359375, + "reward_std": 0.25947707891464233, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 727, + "tools/generated_tokens": 5439.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.93359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1302.54296875, + "completions/mean_terminated_length": 1098.5621337890625, + "completions/min_length": 158.0, + "completions/min_terminated_length": 158.0, + "entropy": 0.2258697571232915, + "epoch": 0.1240547851832918, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17060135304927826, + "learning_rate": 1e-06, + "loss": 0.0277, + "num_tokens": 315158190.0, + "reward": 0.56640625, + "reward_std": 0.2657203674316406, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 728, + "tools/generated_tokens": 4758.5546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1382.52734375, + "completions/mean_terminated_length": 1169.8555908203125, + "completions/min_length": 265.0, + "completions/min_terminated_length": 265.0, + "entropy": 0.2211061930283904, + "epoch": 0.12422519010799413, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15268555283546448, + "learning_rate": 1e-06, + "loss": -0.0258, + "num_tokens": 315585125.0, + "reward": 0.3984375, + "reward_std": 0.2252492606639862, + "rewards/simpleverify_reward/mean": 0.3984375, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 729, + "tools/generated_tokens": 4478.546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1384.4921875, + "completions/mean_terminated_length": 1215.36279296875, + "completions/min_length": 164.0, + "completions/min_terminated_length": 164.0, + "entropy": 0.2283381512388587, + "epoch": 0.12439559503269644, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15997079014778137, + "learning_rate": 1e-06, + "loss": 0.0464, + "num_tokens": 316017027.0, + "reward": 0.5703125, + "reward_std": 0.23340700566768646, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 730, + "tools/generated_tokens": 4664.5078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1342.6796875, + "completions/mean_terminated_length": 1162.9019775390625, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.2390185883268714, + "epoch": 0.12456599995739877, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.17097213864326477, + "learning_rate": 1e-06, + "loss": 0.0529, + "num_tokens": 316447265.0, + "reward": 0.50390625, + "reward_std": 0.26264533400535583, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 731, + "tools/generated_tokens": 4686.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1361.94921875, + "completions/mean_terminated_length": 1207.674560546875, + "completions/min_length": 74.0, + "completions/min_terminated_length": 74.0, + "entropy": 0.2609061785042286, + "epoch": 0.1247364048821011, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1683957874774933, + "learning_rate": 1e-06, + "loss": 0.014, + "num_tokens": 316874436.0, + "reward": 0.6328125, + "reward_std": 0.1835355907678604, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 732, + "tools/generated_tokens": 4409.94921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1302.30859375, + "completions/mean_terminated_length": 1098.278564453125, + "completions/min_length": 142.0, + "completions/min_terminated_length": 142.0, + "entropy": 0.19152087066322565, + "epoch": 0.12490680980680342, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12962663173675537, + "learning_rate": 1e-06, + "loss": -0.0035, + "num_tokens": 317289619.0, + "reward": 0.44140625, + "reward_std": 0.162959486246109, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 733, + "tools/generated_tokens": 4286.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.45703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1373.1171875, + "completions/mean_terminated_length": 1124.11767578125, + "completions/min_length": 336.0, + "completions/min_terminated_length": 336.0, + "entropy": 0.20564308110624552, + "epoch": 0.12507721473150574, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1270165592432022, + "learning_rate": 1e-06, + "loss": 0.045, + "num_tokens": 317724513.0, + "reward": 0.5, + "reward_std": 0.22941282391548157, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 734, + "tools/generated_tokens": 4885.13671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1285.4765625, + "completions/mean_terminated_length": 1067.0753173828125, + "completions/min_length": 46.0, + "completions/min_terminated_length": 46.0, + "entropy": 0.23471161536872387, + "epoch": 0.12524761965620806, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.18795357644557953, + "learning_rate": 1e-06, + "loss": 0.0143, + "num_tokens": 318143227.0, + "reward": 0.4921875, + "reward_std": 0.299323707818985, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 735, + "tools/generated_tokens": 4925.484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1204.640625, + "completions/mean_terminated_length": 1029.603759765625, + "completions/min_length": 10.0, + "completions/min_terminated_length": 10.0, + "entropy": 0.26938064489513636, + "epoch": 0.1254180245809104, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16767403483390808, + "learning_rate": 1e-06, + "loss": -0.0012, + "num_tokens": 318540351.0, + "reward": 0.53125, + "reward_std": 0.2067594677209854, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 736, + "tools/generated_tokens": 4772.63671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1412.6015625, + "completions/mean_terminated_length": 1113.17236328125, + "completions/min_length": 77.0, + "completions/min_terminated_length": 77.0, + "entropy": 0.2551824441179633, + "epoch": 0.12558842950561272, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1621071696281433, + "learning_rate": 1e-06, + "loss": 0.0234, + "num_tokens": 318999049.0, + "reward": 0.546875, + "reward_std": 0.3011544942855835, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 737, + "tools/generated_tokens": 5628.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.05859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1275.375, + "completions/mean_terminated_length": 1119.3990478515625, + "completions/min_length": 170.0, + "completions/min_terminated_length": 170.0, + "entropy": 0.2588097807019949, + "epoch": 0.12575883443031505, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.215603768825531, + "learning_rate": 1e-06, + "loss": 0.0091, + "num_tokens": 319412713.0, + "reward": 0.5625, + "reward_std": 0.32814085483551025, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 738, + "tools/generated_tokens": 4635.375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1366.0078125, + "completions/mean_terminated_length": 1119.345703125, + "completions/min_length": 167.0, + "completions/min_terminated_length": 167.0, + "entropy": 0.22428589407354593, + "epoch": 0.12592923935501735, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.1948491632938385, + "learning_rate": 1e-06, + "loss": 0.0425, + "num_tokens": 319852811.0, + "reward": 0.45703125, + "reward_std": 0.3425579071044922, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 739, + "tools/generated_tokens": 5510.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1233.390625, + "completions/mean_terminated_length": 1133.350830078125, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.25503019988536835, + "epoch": 0.12609964427971967, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.14445248246192932, + "learning_rate": 1e-06, + "loss": 0.0161, + "num_tokens": 320237343.0, + "reward": 0.37890625, + "reward_std": 0.15338994562625885, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 740, + "tools/generated_tokens": 3721.39453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.21484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1451.34765625, + "completions/mean_terminated_length": 1280.46728515625, + "completions/min_length": 261.0, + "completions/min_terminated_length": 261.0, + "entropy": 0.24542142823338509, + "epoch": 0.126270049204422, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.15893565118312836, + "learning_rate": 1e-06, + "loss": 0.0127, + "num_tokens": 320682744.0, + "reward": 0.4765625, + "reward_std": 0.2306618094444275, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 741, + "tools/generated_tokens": 4715.37109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1330.66015625, + "completions/mean_terminated_length": 1120.54541015625, + "completions/min_length": 242.0, + "completions/min_terminated_length": 242.0, + "entropy": 0.26824986282736063, + "epoch": 0.12644045412912433, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.12786538898944855, + "learning_rate": 1e-06, + "loss": 0.0266, + "num_tokens": 321103169.0, + "reward": 0.6015625, + "reward_std": 0.1048629954457283, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 742, + "tools/generated_tokens": 4434.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1232.00390625, + "completions/mean_terminated_length": 1123.685791015625, + "completions/min_length": 192.0, + "completions/min_terminated_length": 192.0, + "entropy": 0.2896163584664464, + "epoch": 0.12661085905382666, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1879495531320572, + "learning_rate": 1e-06, + "loss": 0.0069, + "num_tokens": 321500418.0, + "reward": 0.4453125, + "reward_std": 0.24466386437416077, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 743, + "tools/generated_tokens": 4304.00390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1298.71484375, + "completions/mean_terminated_length": 1093.696533203125, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "entropy": 0.20860141050070524, + "epoch": 0.12678126397852899, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.17947924137115479, + "learning_rate": 1e-06, + "loss": 0.0243, + "num_tokens": 321921049.0, + "reward": 0.5234375, + "reward_std": 0.266292929649353, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 744, + "tools/generated_tokens": 4794.71875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.70703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1431.578125, + "completions/mean_terminated_length": 1061.737548828125, + "completions/min_length": 264.0, + "completions/min_terminated_length": 264.0, + "entropy": 0.24521063640713692, + "epoch": 0.1269516689032313, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2588927447795868, + "learning_rate": 1e-06, + "loss": 0.0305, + "num_tokens": 322378605.0, + "reward": 0.53125, + "reward_std": 0.2354571670293808, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 745, + "tools/generated_tokens": 5839.5859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.15234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1246.34375, + "completions/mean_terminated_length": 1093.474365234375, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.2563879229128361, + "epoch": 0.12712207382793364, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.1865626573562622, + "learning_rate": 1e-06, + "loss": 0.0124, + "num_tokens": 322788261.0, + "reward": 0.5078125, + "reward_std": 0.2784985899925232, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 746, + "tools/generated_tokens": 4414.34765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1448.48828125, + "completions/mean_terminated_length": 1064.1922607421875, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.2458594087511301, + "epoch": 0.12729247875263594, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.29897037148475647, + "learning_rate": 1e-06, + "loss": 0.009, + "num_tokens": 323250962.0, + "reward": 0.30859375, + "reward_std": 0.22523343563079834, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 747, + "tools/generated_tokens": 5672.48828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.0625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1400.5859375, + "completions/mean_terminated_length": 1193.680419921875, + "completions/min_length": 204.0, + "completions/min_terminated_length": 204.0, + "entropy": 0.2490228544920683, + "epoch": 0.12746288367733827, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1579890102148056, + "learning_rate": 1e-06, + "loss": 0.0405, + "num_tokens": 323704552.0, + "reward": 0.4140625, + "reward_std": 0.24012479186058044, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 748, + "tools/generated_tokens": 5272.58984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1360.1171875, + "completions/mean_terminated_length": 1158.6162109375, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.24075281340628862, + "epoch": 0.1276332886020406, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1761752963066101, + "learning_rate": 1e-06, + "loss": 0.0354, + "num_tokens": 324134294.0, + "reward": 0.33984375, + "reward_std": 0.2238122820854187, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 749, + "tools/generated_tokens": 4928.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1267.21875, + "completions/mean_terminated_length": 1105.1697998046875, + "completions/min_length": 140.0, + "completions/min_terminated_length": 140.0, + "entropy": 0.259521072730422, + "epoch": 0.12780369352674292, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20703807473182678, + "learning_rate": 1e-06, + "loss": 0.065, + "num_tokens": 324557886.0, + "reward": 0.6328125, + "reward_std": 0.3279016315937042, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 750, + "tools/generated_tokens": 4819.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2012.0, + "completions/mean_length": 1216.98046875, + "completions/mean_terminated_length": 1072.12841796875, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.22728418465703726, + "epoch": 0.12797409845144525, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17022736370563507, + "learning_rate": 1e-06, + "loss": 0.023, + "num_tokens": 324953193.0, + "reward": 0.65234375, + "reward_std": 0.2394643872976303, + "rewards/simpleverify_reward/mean": 0.65234375, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 751, + "tools/generated_tokens": 4352.9921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1313.6015625, + "completions/mean_terminated_length": 1234.1212158203125, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.21983722131699324, + "epoch": 0.12814450337614758, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15194271504878998, + "learning_rate": 1e-06, + "loss": 0.0064, + "num_tokens": 325362499.0, + "reward": 0.5078125, + "reward_std": 0.15723668038845062, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 752, + "tools/generated_tokens": 3585.59375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1363.44921875, + "completions/mean_terminated_length": 1184.72900390625, + "completions/min_length": 148.0, + "completions/min_terminated_length": 148.0, + "entropy": 0.25913594383746386, + "epoch": 0.1283149083008499, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1837807297706604, + "learning_rate": 1e-06, + "loss": 0.0251, + "num_tokens": 325795686.0, + "reward": 0.33984375, + "reward_std": 0.23348368704319, + "rewards/simpleverify_reward/mean": 0.33984375, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 753, + "tools/generated_tokens": 4643.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1514.44921875, + "completions/mean_terminated_length": 1325.3121337890625, + "completions/min_length": 100.0, + "completions/min_terminated_length": 100.0, + "entropy": 0.24923780746757984, + "epoch": 0.1284853132255522, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1908637136220932, + "learning_rate": 1e-06, + "loss": 0.0119, + "num_tokens": 326262649.0, + "reward": 0.55859375, + "reward_std": 0.21669067442417145, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 754, + "tools/generated_tokens": 4418.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1263.05859375, + "completions/mean_terminated_length": 1104.5963134765625, + "completions/min_length": 151.0, + "completions/min_terminated_length": 151.0, + "entropy": 0.20850294083356857, + "epoch": 0.12865571815025453, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.17914734780788422, + "learning_rate": 1e-06, + "loss": -0.0136, + "num_tokens": 326660904.0, + "reward": 0.5625, + "reward_std": 0.18706360459327698, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 755, + "tools/generated_tokens": 4271.06640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1337.94140625, + "completions/mean_terminated_length": 1139.14501953125, + "completions/min_length": 192.0, + "completions/min_terminated_length": 192.0, + "entropy": 0.22479182668030262, + "epoch": 0.12882612307495686, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.15490786731243134, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 327078537.0, + "reward": 0.55859375, + "reward_std": 0.1475857049226761, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 756, + "tools/generated_tokens": 4457.96484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1411.140625, + "completions/mean_terminated_length": 1100.1220703125, + "completions/min_length": 358.0, + "completions/min_terminated_length": 358.0, + "entropy": 0.22543011792004108, + "epoch": 0.1289965279996592, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1745597869157791, + "learning_rate": 1e-06, + "loss": 0.02, + "num_tokens": 327529085.0, + "reward": 0.3203125, + "reward_std": 0.22623121738433838, + "rewards/simpleverify_reward/mean": 0.3203125, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 757, + "tools/generated_tokens": 5595.14453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.04296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1278.5, + "completions/mean_terminated_length": 1191.512939453125, + "completions/min_length": 132.0, + "completions/min_terminated_length": 132.0, + "entropy": 0.2280335519462824, + "epoch": 0.12916693292436152, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1486106514930725, + "learning_rate": 1e-06, + "loss": 0.0029, + "num_tokens": 327939437.0, + "reward": 0.4765625, + "reward_std": 0.15985959768295288, + "rewards/simpleverify_reward/mean": 0.4765625, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 758, + "tools/generated_tokens": 4174.4921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1222.5625, + "completions/mean_terminated_length": 1060.5606689453125, + "completions/min_length": 241.0, + "completions/min_terminated_length": 241.0, + "entropy": 0.241200378164649, + "epoch": 0.12933733784906384, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.21343690156936646, + "learning_rate": 1e-06, + "loss": 0.027, + "num_tokens": 328341213.0, + "reward": 0.484375, + "reward_std": 0.3007515072822571, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 759, + "tools/generated_tokens": 4782.55859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1250.0703125, + "completions/mean_terminated_length": 1136.0848388671875, + "completions/min_length": 77.0, + "completions/min_terminated_length": 77.0, + "entropy": 0.24906747601926327, + "epoch": 0.12950774277376617, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18144749104976654, + "learning_rate": 1e-06, + "loss": 0.0394, + "num_tokens": 328736703.0, + "reward": 0.5, + "reward_std": 0.1760813295841217, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 760, + "tools/generated_tokens": 4282.08203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1209.4140625, + "completions/mean_terminated_length": 1010.9226684570312, + "completions/min_length": 89.0, + "completions/min_terminated_length": 89.0, + "entropy": 0.21843723859637976, + "epoch": 0.1296781476984685, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.15147215127944946, + "learning_rate": 1e-06, + "loss": 0.025, + "num_tokens": 329132265.0, + "reward": 0.40625, + "reward_std": 0.15364307165145874, + "rewards/simpleverify_reward/mean": 0.40625, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 761, + "tools/generated_tokens": 4529.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.32421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1392.66015625, + "completions/mean_terminated_length": 1078.2542724609375, + "completions/min_length": 139.0, + "completions/min_terminated_length": 139.0, + "entropy": 0.22080306615680456, + "epoch": 0.1298485526231708, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.23827821016311646, + "learning_rate": 1e-06, + "loss": 0.0302, + "num_tokens": 329585138.0, + "reward": 0.41796875, + "reward_std": 0.3341853618621826, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 762, + "tools/generated_tokens": 5624.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.06640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1427.1171875, + "completions/mean_terminated_length": 1207.0263671875, + "completions/min_length": 157.0, + "completions/min_terminated_length": 157.0, + "entropy": 0.23719217535108328, + "epoch": 0.13001895754787313, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20637406408786774, + "learning_rate": 1e-06, + "loss": 0.0076, + "num_tokens": 330033824.0, + "reward": 0.4609375, + "reward_std": 0.24900490045547485, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 763, + "tools/generated_tokens": 5299.12109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1232.45703125, + "completions/mean_terminated_length": 1132.3026123046875, + "completions/min_length": 63.0, + "completions/min_terminated_length": 63.0, + "entropy": 0.22648604400455952, + "epoch": 0.13018936247257545, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18517160415649414, + "learning_rate": 1e-06, + "loss": -0.0135, + "num_tokens": 330424517.0, + "reward": 0.58203125, + "reward_std": 0.246024489402771, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 764, + "tools/generated_tokens": 4024.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.36328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1227.23046875, + "completions/mean_terminated_length": 1126.4342041015625, + "completions/min_length": 175.0, + "completions/min_terminated_length": 175.0, + "entropy": 0.22353226132690907, + "epoch": 0.13035976739727778, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.6939458250999451, + "learning_rate": 1e-06, + "loss": 0.0293, + "num_tokens": 330817328.0, + "reward": 0.55078125, + "reward_std": 0.26575854420661926, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 765, + "tools/generated_tokens": 4155.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1266.28125, + "completions/mean_terminated_length": 1090.488037109375, + "completions/min_length": 203.0, + "completions/min_terminated_length": 203.0, + "entropy": 0.21640700567513704, + "epoch": 0.1305301723219801, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19001305103302002, + "learning_rate": 1e-06, + "loss": 0.0068, + "num_tokens": 331223272.0, + "reward": 0.5078125, + "reward_std": 0.19828036427497864, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 766, + "tools/generated_tokens": 4402.28125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1291.35546875, + "completions/mean_terminated_length": 1079.4949951171875, + "completions/min_length": 181.0, + "completions/min_terminated_length": 181.0, + "entropy": 0.24471392016857862, + "epoch": 0.13070057724668244, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18988561630249023, + "learning_rate": 1e-06, + "loss": 0.0067, + "num_tokens": 331632483.0, + "reward": 0.5625, + "reward_std": 0.2298790067434311, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 767, + "tools/generated_tokens": 4459.359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1321.8984375, + "completions/mean_terminated_length": 1094.759033203125, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.22367277555167675, + "epoch": 0.13087098217138476, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2953495383262634, + "learning_rate": 1e-06, + "loss": -0.0109, + "num_tokens": 332052249.0, + "reward": 0.54296875, + "reward_std": 0.23544135689735413, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 768, + "tools/generated_tokens": 4553.8984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1230.19921875, + "completions/mean_terminated_length": 1125.731201171875, + "completions/min_length": 76.0, + "completions/min_terminated_length": 76.0, + "entropy": 0.205205911770463, + "epoch": 0.13104138709608706, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.14145499467849731, + "learning_rate": 1e-06, + "loss": 0.0108, + "num_tokens": 332430012.0, + "reward": 0.59765625, + "reward_std": 0.15481583774089813, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 769, + "tools/generated_tokens": 3542.2109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.12890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1443.11328125, + "completions/mean_terminated_length": 1210.9676513671875, + "completions/min_length": 200.0, + "completions/min_terminated_length": 200.0, + "entropy": 0.17452639434486628, + "epoch": 0.1312117920207894, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.16943518817424774, + "learning_rate": 1e-06, + "loss": 0.0336, + "num_tokens": 332878681.0, + "reward": 0.5546875, + "reward_std": 0.3109434247016907, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 770, + "tools/generated_tokens": 5091.125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.78125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1353.765625, + "completions/mean_terminated_length": 1092.49462890625, + "completions/min_length": 218.0, + "completions/min_terminated_length": 218.0, + "entropy": 0.25280464068055153, + "epoch": 0.13138219694549172, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.20427419245243073, + "learning_rate": 1e-06, + "loss": 0.0314, + "num_tokens": 333315901.0, + "reward": 0.4921875, + "reward_std": 0.23677174746990204, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 771, + "tools/generated_tokens": 5161.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1242.1015625, + "completions/mean_terminated_length": 1114.4842529296875, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.24298510421067476, + "epoch": 0.13155260187019405, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2038157433271408, + "learning_rate": 1e-06, + "loss": -0.0289, + "num_tokens": 333721079.0, + "reward": 0.6015625, + "reward_std": 0.20255522429943085, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 772, + "tools/generated_tokens": 4458.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1292.96484375, + "completions/mean_terminated_length": 1131.9384765625, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "entropy": 0.2188622960820794, + "epoch": 0.13172300679489637, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19618146121501923, + "learning_rate": 1e-06, + "loss": 0.0387, + "num_tokens": 334127758.0, + "reward": 0.6171875, + "reward_std": 0.24986931681632996, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 773, + "tools/generated_tokens": 4228.97265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1436.421875, + "completions/mean_terminated_length": 1163.4576416015625, + "completions/min_length": 140.0, + "completions/min_terminated_length": 140.0, + "entropy": 0.22219976130872965, + "epoch": 0.1318934117195987, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17642802000045776, + "learning_rate": 1e-06, + "loss": 0.0211, + "num_tokens": 334582298.0, + "reward": 0.4375, + "reward_std": 0.2617560625076294, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 774, + "tools/generated_tokens": 5420.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1401.9140625, + "completions/mean_terminated_length": 1260.3905029296875, + "completions/min_length": 213.0, + "completions/min_terminated_length": 213.0, + "entropy": 0.22596578113734722, + "epoch": 0.13206381664430103, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15917572379112244, + "learning_rate": 1e-06, + "loss": 0.0187, + "num_tokens": 335025108.0, + "reward": 0.50390625, + "reward_std": 0.2054290622472763, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 775, + "tools/generated_tokens": 4289.91796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1409.12109375, + "completions/mean_terminated_length": 1097.1104736328125, + "completions/min_length": 108.0, + "completions/min_terminated_length": 108.0, + "entropy": 0.19567883107811213, + "epoch": 0.13223422156900336, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.24766862392425537, + "learning_rate": 1e-06, + "loss": 0.0172, + "num_tokens": 335470179.0, + "reward": 0.52734375, + "reward_std": 0.27840593457221985, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 776, + "tools/generated_tokens": 5441.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.96875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1274.79296875, + "completions/mean_terminated_length": 1160.372314453125, + "completions/min_length": 197.0, + "completions/min_terminated_length": 197.0, + "entropy": 0.2464032955467701, + "epoch": 0.13240462649370566, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18497081100940704, + "learning_rate": 1e-06, + "loss": 0.0083, + "num_tokens": 335877982.0, + "reward": 0.44140625, + "reward_std": 0.24425500631332397, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 777, + "tools/generated_tokens": 4826.796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1283.06640625, + "completions/mean_terminated_length": 1192.8778076171875, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.195402885787189, + "epoch": 0.13257503141840798, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.20464791357517242, + "learning_rate": 1e-06, + "loss": 0.0229, + "num_tokens": 336277679.0, + "reward": 0.671875, + "reward_std": 0.20443323254585266, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 778, + "tools/generated_tokens": 3699.0625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1379.1796875, + "completions/mean_terminated_length": 1255.3240966796875, + "completions/min_length": 125.0, + "completions/min_terminated_length": 125.0, + "entropy": 0.2305822717025876, + "epoch": 0.1327454363431103, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19441990554332733, + "learning_rate": 1e-06, + "loss": 0.0189, + "num_tokens": 336713437.0, + "reward": 0.48828125, + "reward_std": 0.2589833438396454, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 779, + "tools/generated_tokens": 4659.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1306.515625, + "completions/mean_terminated_length": 1192.95947265625, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.22024625819176435, + "epoch": 0.13291584126781264, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.23439627885818481, + "learning_rate": 1e-06, + "loss": 0.0353, + "num_tokens": 337128497.0, + "reward": 0.5234375, + "reward_std": 0.32919546961784363, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 780, + "tools/generated_tokens": 4418.53125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1106.4296875, + "completions/mean_terminated_length": 1083.83203125, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.22653399221599102, + "epoch": 0.13308624619251497, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2142978012561798, + "learning_rate": 1e-06, + "loss": -0.0058, + "num_tokens": 337494095.0, + "reward": 0.61328125, + "reward_std": 0.24492931365966797, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 781, + "tools/generated_tokens": 3626.43359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.23046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1251.58984375, + "completions/mean_terminated_length": 1125.4705810546875, + "completions/min_length": 165.0, + "completions/min_terminated_length": 165.0, + "entropy": 0.2552179265767336, + "epoch": 0.1332566511172173, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.21872830390930176, + "learning_rate": 1e-06, + "loss": 0.0377, + "num_tokens": 337898486.0, + "reward": 0.35546875, + "reward_std": 0.2791505455970764, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 782, + "tools/generated_tokens": 4891.60546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.77734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1298.1484375, + "completions/mean_terminated_length": 1102.38427734375, + "completions/min_length": 164.0, + "completions/min_terminated_length": 164.0, + "entropy": 0.24485708214342594, + "epoch": 0.13342705604191962, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.20661672949790955, + "learning_rate": 1e-06, + "loss": 0.009, + "num_tokens": 338315660.0, + "reward": 0.4140625, + "reward_std": 0.19423659145832062, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 783, + "tools/generated_tokens": 5146.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.87890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1256.671875, + "completions/mean_terminated_length": 1131.3485107421875, + "completions/min_length": 35.0, + "completions/min_terminated_length": 35.0, + "entropy": 0.21234427765011787, + "epoch": 0.13359746096662192, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19604800641536713, + "learning_rate": 1e-06, + "loss": 0.027, + "num_tokens": 338710408.0, + "reward": 0.4375, + "reward_std": 0.19611266255378723, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 784, + "tools/generated_tokens": 4104.671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1299.63671875, + "completions/mean_terminated_length": 1140.033203125, + "completions/min_length": 259.0, + "completions/min_terminated_length": 259.0, + "entropy": 0.2084363466128707, + "epoch": 0.13376786589132425, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1832629293203354, + "learning_rate": 1e-06, + "loss": 0.0361, + "num_tokens": 339131691.0, + "reward": 0.5078125, + "reward_std": 0.2688092887401581, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 785, + "tools/generated_tokens": 4411.64453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1382.03515625, + "completions/mean_terminated_length": 1240.0047607421875, + "completions/min_length": 119.0, + "completions/min_terminated_length": 119.0, + "entropy": 0.2076737228780985, + "epoch": 0.13393827081602658, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1596766859292984, + "learning_rate": 1e-06, + "loss": 0.0276, + "num_tokens": 339550420.0, + "reward": 0.43359375, + "reward_std": 0.19391977787017822, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 786, + "tools/generated_tokens": 4110.046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.33203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1295.9375, + "completions/mean_terminated_length": 1060.677001953125, + "completions/min_length": 60.0, + "completions/min_terminated_length": 60.0, + "entropy": 0.20822795946151018, + "epoch": 0.1341086757407289, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.24762286245822906, + "learning_rate": 1e-06, + "loss": 0.0399, + "num_tokens": 339984612.0, + "reward": 0.43359375, + "reward_std": 0.31380629539489746, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 787, + "tools/generated_tokens": 5247.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.9296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2006.0, + "completions/mean_length": 1154.12109375, + "completions/mean_terminated_length": 1102.4090576171875, + "completions/min_length": 274.0, + "completions/min_terminated_length": 274.0, + "entropy": 0.202706690877676, + "epoch": 0.13427908066543123, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2033839225769043, + "learning_rate": 1e-06, + "loss": 0.0061, + "num_tokens": 340354739.0, + "reward": 0.73828125, + "reward_std": 0.23622608184814453, + "rewards/simpleverify_reward/mean": 0.73828125, + "rewards/simpleverify_reward/std": 0.4404313564300537, + "step": 788, + "tools/generated_tokens": 3362.12109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1303.8359375, + "completions/mean_terminated_length": 1095.4749755859375, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.23132546804845333, + "epoch": 0.13444948559013356, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.22079981863498688, + "learning_rate": 1e-06, + "loss": 0.0537, + "num_tokens": 340768329.0, + "reward": 0.36328125, + "reward_std": 0.22910727560520172, + "rewards/simpleverify_reward/mean": 0.36328125, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 789, + "tools/generated_tokens": 4647.86328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1259.875, + "completions/mean_terminated_length": 1068.58251953125, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.24689103197306395, + "epoch": 0.1346198905148359, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.17426487803459167, + "learning_rate": 1e-06, + "loss": 0.0199, + "num_tokens": 341176297.0, + "reward": 0.48046875, + "reward_std": 0.23536449670791626, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 790, + "tools/generated_tokens": 5019.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1253.33203125, + "completions/mean_terminated_length": 1159.6375732421875, + "completions/min_length": 231.0, + "completions/min_terminated_length": 231.0, + "entropy": 0.20872488245368004, + "epoch": 0.13479029543953822, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.1594357043504715, + "learning_rate": 1e-06, + "loss": 0.0207, + "num_tokens": 341570974.0, + "reward": 0.48046875, + "reward_std": 0.16124196350574493, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 791, + "tools/generated_tokens": 4125.34375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1183.4140625, + "completions/mean_terminated_length": 1110.14404296875, + "completions/min_length": 186.0, + "completions/min_terminated_length": 186.0, + "entropy": 0.23165373411029577, + "epoch": 0.13496070036424052, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2081676423549652, + "learning_rate": 1e-06, + "loss": 0.0151, + "num_tokens": 341949496.0, + "reward": 0.56640625, + "reward_std": 0.22399571537971497, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 792, + "tools/generated_tokens": 3655.4140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1272.1640625, + "completions/mean_terminated_length": 1227.2808837890625, + "completions/min_length": 278.0, + "completions/min_terminated_length": 278.0, + "entropy": 0.18793443776667118, + "epoch": 0.13513110528894284, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.20044372975826263, + "learning_rate": 1e-06, + "loss": 0.0461, + "num_tokens": 342343842.0, + "reward": 0.71875, + "reward_std": 0.32158076763153076, + "rewards/simpleverify_reward/mean": 0.71875, + "rewards/simpleverify_reward/std": 0.45048993825912476, + "step": 793, + "tools/generated_tokens": 3424.1640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.05078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1423.77734375, + "completions/mean_terminated_length": 1290.6492919921875, + "completions/min_length": 51.0, + "completions/min_terminated_length": 51.0, + "entropy": 0.23818683344870806, + "epoch": 0.13530151021364517, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19164706766605377, + "learning_rate": 1e-06, + "loss": -0.0089, + "num_tokens": 342788905.0, + "reward": 0.5625, + "reward_std": 0.2836625576019287, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 794, + "tools/generated_tokens": 5151.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1287.109375, + "completions/mean_terminated_length": 1232.9874267578125, + "completions/min_length": 198.0, + "completions/min_terminated_length": 198.0, + "entropy": 0.21182250510901213, + "epoch": 0.1354719151383475, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.12743385136127472, + "learning_rate": 1e-06, + "loss": 0.0139, + "num_tokens": 343183925.0, + "reward": 0.41015625, + "reward_std": 0.12436182796955109, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 795, + "tools/generated_tokens": 3479.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.0703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1166.4296875, + "completions/mean_terminated_length": 1103.723876953125, + "completions/min_length": 100.0, + "completions/min_terminated_length": 100.0, + "entropy": 0.25628670770674944, + "epoch": 0.13564232006304983, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2275211066007614, + "learning_rate": 1e-06, + "loss": -0.0129, + "num_tokens": 343561315.0, + "reward": 0.484375, + "reward_std": 0.27587568759918213, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 796, + "tools/generated_tokens": 3942.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.35546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1383.3046875, + "completions/mean_terminated_length": 1291.7244873046875, + "completions/min_length": 60.0, + "completions/min_terminated_length": 60.0, + "entropy": 0.22418879810720682, + "epoch": 0.13581272498775215, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21201610565185547, + "learning_rate": 1e-06, + "loss": 0.0071, + "num_tokens": 343987489.0, + "reward": 0.51953125, + "reward_std": 0.2889299988746643, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 797, + "tools/generated_tokens": 4071.3203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1368.984375, + "completions/mean_terminated_length": 1250.62841796875, + "completions/min_length": 223.0, + "completions/min_terminated_length": 223.0, + "entropy": 0.22045026160776615, + "epoch": 0.13598312991245448, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.1547163426876068, + "learning_rate": 1e-06, + "loss": 0.0029, + "num_tokens": 344407901.0, + "reward": 0.58203125, + "reward_std": 0.13016413152217865, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 798, + "tools/generated_tokens": 3993.00390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1183.9921875, + "completions/mean_terminated_length": 1042.6136474609375, + "completions/min_length": 113.0, + "completions/min_terminated_length": 113.0, + "entropy": 0.2289585219696164, + "epoch": 0.13615353483715678, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.29640427231788635, + "learning_rate": 1e-06, + "loss": 0.0455, + "num_tokens": 344790139.0, + "reward": 0.6015625, + "reward_std": 0.24570295214653015, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 799, + "tools/generated_tokens": 4288.0, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1174.734375, + "completions/mean_terminated_length": 1058.814208984375, + "completions/min_length": 254.0, + "completions/min_terminated_length": 254.0, + "entropy": 0.2375791324302554, + "epoch": 0.1363239397618591, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1987527310848236, + "learning_rate": 1e-06, + "loss": 0.0045, + "num_tokens": 345168391.0, + "reward": 0.421875, + "reward_std": 0.16872048377990723, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 800, + "tools/generated_tokens": 4086.75390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1137.36328125, + "completions/mean_terminated_length": 1025.53076171875, + "completions/min_length": 132.0, + "completions/min_terminated_length": 132.0, + "entropy": 0.2413704339414835, + "epoch": 0.13649434468656144, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.17108941078186035, + "learning_rate": 1e-06, + "loss": 0.0137, + "num_tokens": 345532452.0, + "reward": 0.42578125, + "reward_std": 0.15635645389556885, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 801, + "tools/generated_tokens": 3617.359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.01953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1142.30078125, + "completions/mean_terminated_length": 1124.259033203125, + "completions/min_length": 199.0, + "completions/min_terminated_length": 199.0, + "entropy": 0.26826963387429714, + "epoch": 0.13666474961126376, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.23119020462036133, + "learning_rate": 1e-06, + "loss": 0.0163, + "num_tokens": 345902289.0, + "reward": 0.4609375, + "reward_std": 0.23441588878631592, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 802, + "tools/generated_tokens": 3694.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1271.609375, + "completions/mean_terminated_length": 1106.033203125, + "completions/min_length": 20.0, + "completions/min_terminated_length": 20.0, + "entropy": 0.19994298368692398, + "epoch": 0.1368351545359661, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.16138856112957, + "learning_rate": 1e-06, + "loss": 0.0144, + "num_tokens": 346303165.0, + "reward": 0.7734375, + "reward_std": 0.1468139886856079, + "rewards/simpleverify_reward/mean": 0.7734375, + "rewards/simpleverify_reward/std": 0.41942715644836426, + "step": 803, + "tools/generated_tokens": 4143.63671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1276.671875, + "completions/mean_terminated_length": 1221.8116455078125, + "completions/min_length": 146.0, + "completions/min_terminated_length": 146.0, + "entropy": 0.2121621072292328, + "epoch": 0.13700555946066842, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.16794490814208984, + "learning_rate": 1e-06, + "loss": 0.02, + "num_tokens": 346703497.0, + "reward": 0.6328125, + "reward_std": 0.14106407761573792, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 804, + "tools/generated_tokens": 3628.66796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1265.01953125, + "completions/mean_terminated_length": 1176.512939453125, + "completions/min_length": 275.0, + "completions/min_terminated_length": 275.0, + "entropy": 0.22833317331969738, + "epoch": 0.13717596438537075, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2039792388677597, + "learning_rate": 1e-06, + "loss": 0.0279, + "num_tokens": 347108782.0, + "reward": 0.5546875, + "reward_std": 0.2755298614501953, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 805, + "tools/generated_tokens": 4217.01953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1321.328125, + "completions/mean_terminated_length": 1202.4180908203125, + "completions/min_length": 51.0, + "completions/min_terminated_length": 51.0, + "entropy": 0.22586555872112513, + "epoch": 0.13734636931007307, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.20045915246009827, + "learning_rate": 1e-06, + "loss": 0.0296, + "num_tokens": 347527506.0, + "reward": 0.55859375, + "reward_std": 0.25559213757514954, + "rewards/simpleverify_reward/mean": 0.55859375, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 806, + "tools/generated_tokens": 4657.33984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1219.82421875, + "completions/mean_terminated_length": 1038.414306640625, + "completions/min_length": 106.0, + "completions/min_terminated_length": 106.0, + "entropy": 0.218058155849576, + "epoch": 0.13751677423477537, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15489843487739563, + "learning_rate": 1e-06, + "loss": -0.0017, + "num_tokens": 347917845.0, + "reward": 0.6015625, + "reward_std": 0.16923905909061432, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 807, + "tools/generated_tokens": 3987.82421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1283.1015625, + "completions/mean_terminated_length": 1149.77978515625, + "completions/min_length": 17.0, + "completions/min_terminated_length": 17.0, + "entropy": 0.2296614833176136, + "epoch": 0.1376871791594777, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15924133360385895, + "learning_rate": 1e-06, + "loss": 0.0161, + "num_tokens": 348332815.0, + "reward": 0.38671875, + "reward_std": 0.20598775148391724, + "rewards/simpleverify_reward/mean": 0.38671875, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 808, + "tools/generated_tokens": 4475.1171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1181.6015625, + "completions/mean_terminated_length": 1021.1574096679688, + "completions/min_length": 72.0, + "completions/min_terminated_length": 72.0, + "entropy": 0.2067298498004675, + "epoch": 0.13785758408418003, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20320157706737518, + "learning_rate": 1e-06, + "loss": 0.0222, + "num_tokens": 348744345.0, + "reward": 0.54296875, + "reward_std": 0.24361473321914673, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 809, + "tools/generated_tokens": 4157.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1325.54296875, + "completions/mean_terminated_length": 1274.15478515625, + "completions/min_length": 178.0, + "completions/min_terminated_length": 178.0, + "entropy": 0.21559187397360802, + "epoch": 0.13802798900888236, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18362054228782654, + "learning_rate": 1e-06, + "loss": 0.0258, + "num_tokens": 349144196.0, + "reward": 0.68359375, + "reward_std": 0.19744305312633514, + "rewards/simpleverify_reward/mean": 0.68359375, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 810, + "tools/generated_tokens": 3229.5546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.9296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.26171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1277.98046875, + "completions/mean_terminated_length": 1005.0211181640625, + "completions/min_length": 130.0, + "completions/min_terminated_length": 130.0, + "entropy": 0.28414052817970514, + "epoch": 0.13819839393358468, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18879370391368866, + "learning_rate": 1e-06, + "loss": -0.0018, + "num_tokens": 349563599.0, + "reward": 0.41015625, + "reward_std": 0.20730705559253693, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 811, + "tools/generated_tokens": 5165.98828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1352.12890625, + "completions/mean_terminated_length": 1170.4581298828125, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.2402975307777524, + "epoch": 0.138368798858287, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.22159968316555023, + "learning_rate": 1e-06, + "loss": 0.0355, + "num_tokens": 349992080.0, + "reward": 0.578125, + "reward_std": 0.2767269015312195, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 812, + "tools/generated_tokens": 4976.14453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1361.875, + "completions/mean_terminated_length": 1103.6666259765625, + "completions/min_length": 164.0, + "completions/min_terminated_length": 164.0, + "entropy": 0.23502462450414896, + "epoch": 0.13853920378298934, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18354931473731995, + "learning_rate": 1e-06, + "loss": 0.0266, + "num_tokens": 350430016.0, + "reward": 0.4140625, + "reward_std": 0.21752606332302094, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 813, + "tools/generated_tokens": 5257.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.90234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1399.40234375, + "completions/mean_terminated_length": 1200.857177734375, + "completions/min_length": 121.0, + "completions/min_terminated_length": 121.0, + "entropy": 0.23256792780011892, + "epoch": 0.13870960870769164, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.176396906375885, + "learning_rate": 1e-06, + "loss": 0.0158, + "num_tokens": 350867047.0, + "reward": 0.44921875, + "reward_std": 0.16877499222755432, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 814, + "tools/generated_tokens": 4775.40234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1314.77734375, + "completions/mean_terminated_length": 1228.3363037109375, + "completions/min_length": 224.0, + "completions/min_terminated_length": 224.0, + "entropy": 0.22808466758579016, + "epoch": 0.13888001363239397, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.26083889603614807, + "learning_rate": 1e-06, + "loss": -0.0034, + "num_tokens": 351280910.0, + "reward": 0.56640625, + "reward_std": 0.1604563295841217, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 815, + "tools/generated_tokens": 3906.7890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1413.96484375, + "completions/mean_terminated_length": 1146.2611083984375, + "completions/min_length": 190.0, + "completions/min_terminated_length": 190.0, + "entropy": 0.24262877833098173, + "epoch": 0.1390504185570963, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.21163709461688995, + "learning_rate": 1e-06, + "loss": 0.0116, + "num_tokens": 351725845.0, + "reward": 0.46875, + "reward_std": 0.18056906759738922, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 816, + "tools/generated_tokens": 5533.97265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 2.01171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1245.390625, + "completions/mean_terminated_length": 1142.8546142578125, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.2075537694618106, + "epoch": 0.13922082348179862, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.1580284833908081, + "learning_rate": 1e-06, + "loss": 0.0059, + "num_tokens": 352124329.0, + "reward": 0.5859375, + "reward_std": 0.10331955552101135, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 817, + "tools/generated_tokens": 3693.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1213.72265625, + "completions/mean_terminated_length": 1115.358154296875, + "completions/min_length": 198.0, + "completions/min_terminated_length": 198.0, + "entropy": 0.23233703058212996, + "epoch": 0.13939122840650095, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.20453108847141266, + "learning_rate": 1e-06, + "loss": -0.0097, + "num_tokens": 352502994.0, + "reward": 0.49609375, + "reward_std": 0.13699322938919067, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 818, + "tools/generated_tokens": 3893.734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1331.66015625, + "completions/mean_terminated_length": 1153.44873046875, + "completions/min_length": 275.0, + "completions/min_terminated_length": 275.0, + "entropy": 0.29249880835413933, + "epoch": 0.13956163333120328, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.19395296275615692, + "learning_rate": 1e-06, + "loss": 0.01, + "num_tokens": 352937467.0, + "reward": 0.328125, + "reward_std": 0.19499439001083374, + "rewards/simpleverify_reward/mean": 0.328125, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 819, + "tools/generated_tokens": 5219.6640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1098.1875, + "completions/mean_terminated_length": 1013.3106079101562, + "completions/min_length": 229.0, + "completions/min_terminated_length": 229.0, + "entropy": 0.23998859897255898, + "epoch": 0.1397320382559056, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.2685585618019104, + "learning_rate": 1e-06, + "loss": 0.0363, + "num_tokens": 353293723.0, + "reward": 0.66015625, + "reward_std": 0.29257601499557495, + "rewards/simpleverify_reward/mean": 0.66015625, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 820, + "tools/generated_tokens": 3514.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1251.22265625, + "completions/mean_terminated_length": 1120.8408203125, + "completions/min_length": 123.0, + "completions/min_terminated_length": 123.0, + "entropy": 0.2335311807692051, + "epoch": 0.13990244318060793, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1694953292608261, + "learning_rate": 1e-06, + "loss": 0.0288, + "num_tokens": 353703524.0, + "reward": 0.5078125, + "reward_std": 0.18627606332302094, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 821, + "tools/generated_tokens": 4611.22265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1335.7265625, + "completions/mean_terminated_length": 1127.080810546875, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.25337381288409233, + "epoch": 0.14007284810531023, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19121068716049194, + "learning_rate": 1e-06, + "loss": 0.0114, + "num_tokens": 354141662.0, + "reward": 0.50390625, + "reward_std": 0.23945963382720947, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 822, + "tools/generated_tokens": 5263.71875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.91796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1272.76953125, + "completions/mean_terminated_length": 1111.882080078125, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.20426524244248867, + "epoch": 0.14024325303001256, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19338412582874298, + "learning_rate": 1e-06, + "loss": 0.0552, + "num_tokens": 354543667.0, + "reward": 0.6171875, + "reward_std": 0.25207993388175964, + "rewards/simpleverify_reward/mean": 0.6171875, + "rewards/simpleverify_reward/std": 0.48702529072761536, + "step": 823, + "tools/generated_tokens": 4248.78125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1369.9140625, + "completions/mean_terminated_length": 1213.4375, + "completions/min_length": 118.0, + "completions/min_terminated_length": 118.0, + "entropy": 0.20944975595921278, + "epoch": 0.1404136579547149, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.12369755655527115, + "learning_rate": 1e-06, + "loss": 0.0082, + "num_tokens": 354963085.0, + "reward": 0.62109375, + "reward_std": 0.14979633688926697, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 824, + "tools/generated_tokens": 4273.92578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1289.65234375, + "completions/mean_terminated_length": 1188.9910888671875, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.23713061213493347, + "epoch": 0.14058406287941722, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18087173998355865, + "learning_rate": 1e-06, + "loss": 0.0183, + "num_tokens": 355367684.0, + "reward": 0.6015625, + "reward_std": 0.19519630074501038, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 825, + "tools/generated_tokens": 4113.6484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1258.69140625, + "completions/mean_terminated_length": 1216.4649658203125, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.22486429754644632, + "epoch": 0.14075446780411954, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.18617857992649078, + "learning_rate": 1e-06, + "loss": 0.024, + "num_tokens": 355758965.0, + "reward": 0.625, + "reward_std": 0.10331955552101135, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 826, + "tools/generated_tokens": 3498.6875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.09375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1299.796875, + "completions/mean_terminated_length": 1192.9107666015625, + "completions/min_length": 212.0, + "completions/min_terminated_length": 212.0, + "entropy": 0.2561670271679759, + "epoch": 0.14092487272882187, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1966073215007782, + "learning_rate": 1e-06, + "loss": 0.0084, + "num_tokens": 356164721.0, + "reward": 0.6328125, + "reward_std": 0.2532769739627838, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 827, + "tools/generated_tokens": 4155.79296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1298.41015625, + "completions/mean_terminated_length": 1167.7568359375, + "completions/min_length": 166.0, + "completions/min_terminated_length": 166.0, + "entropy": 0.23608809150755405, + "epoch": 0.1410952776535242, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.14709284901618958, + "learning_rate": 1e-06, + "loss": 0.0127, + "num_tokens": 356583898.0, + "reward": 0.42578125, + "reward_std": 0.17308580875396729, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 828, + "tools/generated_tokens": 4490.41796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1208.51171875, + "completions/mean_terminated_length": 1167.2335205078125, + "completions/min_length": 103.0, + "completions/min_terminated_length": 103.0, + "entropy": 0.22018022369593382, + "epoch": 0.1412656825782265, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.16509723663330078, + "learning_rate": 1e-06, + "loss": 0.0157, + "num_tokens": 356970621.0, + "reward": 0.67578125, + "reward_std": 0.13801807165145874, + "rewards/simpleverify_reward/mean": 0.67578125, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 829, + "tools/generated_tokens": 3456.515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.09765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.03515625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1042.32421875, + "completions/mean_terminated_length": 1005.6842651367188, + "completions/min_length": 68.0, + "completions/min_terminated_length": 68.0, + "entropy": 0.28787852451205254, + "epoch": 0.14143608750292883, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.14435406029224396, + "learning_rate": 1e-06, + "loss": 0.0151, + "num_tokens": 357317776.0, + "reward": 0.46484375, + "reward_std": 0.1156454086303711, + "rewards/simpleverify_reward/mean": 0.46484375, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 830, + "tools/generated_tokens": 3426.33984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2018.0, + "completions/mean_length": 1331.03515625, + "completions/mean_terminated_length": 1202.184326171875, + "completions/min_length": 231.0, + "completions/min_terminated_length": 231.0, + "entropy": 0.22433694265782833, + "epoch": 0.14160649242763115, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.10016655921936035, + "learning_rate": 1e-06, + "loss": 0.018, + "num_tokens": 357725001.0, + "reward": 0.59765625, + "reward_std": 0.10244406759738922, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 831, + "tools/generated_tokens": 3851.04296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.23046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1155.109375, + "completions/mean_terminated_length": 964.687255859375, + "completions/min_length": 216.0, + "completions/min_terminated_length": 216.0, + "entropy": 0.2447952087968588, + "epoch": 0.14177689735233348, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1827242076396942, + "learning_rate": 1e-06, + "loss": 0.0317, + "num_tokens": 358103781.0, + "reward": 0.59765625, + "reward_std": 0.20268860459327698, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 832, + "tools/generated_tokens": 4315.109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.54296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1283.90625, + "completions/mean_terminated_length": 1182.4779052734375, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.24892454501241446, + "epoch": 0.1419473022770358, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18117961287498474, + "learning_rate": 1e-06, + "loss": -0.0067, + "num_tokens": 358512909.0, + "reward": 0.59375, + "reward_std": 0.21182379126548767, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 833, + "tools/generated_tokens": 4195.9140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.29296875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1426.8515625, + "completions/mean_terminated_length": 1169.480712890625, + "completions/min_length": 50.0, + "completions/min_terminated_length": 50.0, + "entropy": 0.2177568394690752, + "epoch": 0.14211770720173814, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.18634583055973053, + "learning_rate": 1e-06, + "loss": 0.0096, + "num_tokens": 358957543.0, + "reward": 0.48828125, + "reward_std": 0.26000863313674927, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 834, + "tools/generated_tokens": 4834.87109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1999.0, + "completions/mean_length": 1295.06640625, + "completions/mean_terminated_length": 1022.7340087890625, + "completions/min_length": 75.0, + "completions/min_terminated_length": 75.0, + "entropy": 0.23896176554262638, + "epoch": 0.14228811212644046, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.20286424458026886, + "learning_rate": 1e-06, + "loss": 0.0031, + "num_tokens": 359365480.0, + "reward": 0.55078125, + "reward_std": 0.17263562977313995, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 835, + "tools/generated_tokens": 4711.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1253.16015625, + "completions/mean_terminated_length": 1123.0999755859375, + "completions/min_length": 236.0, + "completions/min_terminated_length": 236.0, + "entropy": 0.2961372844874859, + "epoch": 0.1424585170511428, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.21413151919841766, + "learning_rate": 1e-06, + "loss": 0.0304, + "num_tokens": 359782977.0, + "reward": 0.453125, + "reward_std": 0.2161029726266861, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 836, + "tools/generated_tokens": 4565.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1333.03125, + "completions/mean_terminated_length": 1230.8973388671875, + "completions/min_length": 202.0, + "completions/min_terminated_length": 202.0, + "entropy": 0.245187783613801, + "epoch": 0.1426289219758451, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.13217699527740479, + "learning_rate": 1e-06, + "loss": 0.0154, + "num_tokens": 360192697.0, + "reward": 0.8359375, + "reward_std": 0.12939241528511047, + "rewards/simpleverify_reward/mean": 0.8359375, + "rewards/simpleverify_reward/std": 0.3710577189922333, + "step": 837, + "tools/generated_tokens": 3733.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1322.84765625, + "completions/mean_terminated_length": 1168.1990966796875, + "completions/min_length": 332.0, + "completions/min_terminated_length": 332.0, + "entropy": 0.2578182676807046, + "epoch": 0.14279932690054742, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.184224933385849, + "learning_rate": 1e-06, + "loss": 0.0204, + "num_tokens": 360615954.0, + "reward": 0.55078125, + "reward_std": 0.2342844307422638, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 838, + "tools/generated_tokens": 4594.85546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.59765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1984.0, + "completions/mean_length": 1177.79296875, + "completions/mean_terminated_length": 1026.10546875, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.24170633126050234, + "epoch": 0.14296973182524975, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.156855970621109, + "learning_rate": 1e-06, + "loss": 0.0176, + "num_tokens": 361006941.0, + "reward": 0.671875, + "reward_std": 0.08351518213748932, + "rewards/simpleverify_reward/mean": 0.671875, + "rewards/simpleverify_reward/std": 0.47045037150382996, + "step": 839, + "tools/generated_tokens": 4057.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1183.8515625, + "completions/mean_terminated_length": 1051.5135498046875, + "completions/min_length": 156.0, + "completions/min_terminated_length": 156.0, + "entropy": 0.24553822353482246, + "epoch": 0.14314013674995207, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.16294178366661072, + "learning_rate": 1e-06, + "loss": 0.029, + "num_tokens": 361392727.0, + "reward": 0.5390625, + "reward_std": 0.21831360459327698, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 840, + "tools/generated_tokens": 4223.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1131.57421875, + "completions/mean_terminated_length": 1027.978271484375, + "completions/min_length": 156.0, + "completions/min_terminated_length": 156.0, + "entropy": 0.24971517082303762, + "epoch": 0.1433105416746544, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.18773284554481506, + "learning_rate": 1e-06, + "loss": 0.0174, + "num_tokens": 361757226.0, + "reward": 0.61328125, + "reward_std": 0.1512194126844406, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 841, + "tools/generated_tokens": 3883.57421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1296.328125, + "completions/mean_terminated_length": 1109.3267822265625, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.2859314167872071, + "epoch": 0.14348094659935673, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.20896050333976746, + "learning_rate": 1e-06, + "loss": 0.0174, + "num_tokens": 362177182.0, + "reward": 0.48046875, + "reward_std": 0.20377904176712036, + "rewards/simpleverify_reward/mean": 0.48046875, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 842, + "tools/generated_tokens": 4952.33203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.78515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1990.0, + "completions/mean_length": 1177.43359375, + "completions/mean_terminated_length": 1053.0670166015625, + "completions/min_length": 126.0, + "completions/min_terminated_length": 126.0, + "entropy": 0.2695024525746703, + "epoch": 0.14365135152405906, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.13191479444503784, + "learning_rate": 1e-06, + "loss": 0.0135, + "num_tokens": 362565677.0, + "reward": 0.44140625, + "reward_std": 0.1347845196723938, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 843, + "tools/generated_tokens": 4561.43359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1288.15625, + "completions/mean_terminated_length": 1112.8173828125, + "completions/min_length": 206.0, + "completions/min_terminated_length": 206.0, + "entropy": 0.23288973979651928, + "epoch": 0.14382175644876136, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.24114589393138885, + "learning_rate": 1e-06, + "loss": 0.0271, + "num_tokens": 362966485.0, + "reward": 0.62890625, + "reward_std": 0.35349398851394653, + "rewards/simpleverify_reward/mean": 0.62890625, + "rewards/simpleverify_reward/std": 0.48404383659362793, + "step": 844, + "tools/generated_tokens": 4320.17578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1126.171875, + "completions/mean_terminated_length": 1056.4580078125, + "completions/min_length": 155.0, + "completions/min_terminated_length": 155.0, + "entropy": 0.261497356928885, + "epoch": 0.14399216137346368, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15535661578178406, + "learning_rate": 1e-06, + "loss": 0.0173, + "num_tokens": 363326817.0, + "reward": 0.71484375, + "reward_std": 0.17114415764808655, + "rewards/simpleverify_reward/mean": 0.71484375, + "rewards/simpleverify_reward/std": 0.4523732364177704, + "step": 845, + "tools/generated_tokens": 3550.1875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.18359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1303.8203125, + "completions/mean_terminated_length": 1237.319091796875, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.23093167133629322, + "epoch": 0.144162566298166, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.14915740489959717, + "learning_rate": 1e-06, + "loss": 0.0054, + "num_tokens": 363740259.0, + "reward": 0.6640625, + "reward_std": 0.1938907653093338, + "rewards/simpleverify_reward/mean": 0.6640625, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 846, + "tools/generated_tokens": 4175.82421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1357.70703125, + "completions/mean_terminated_length": 1155.5050048828125, + "completions/min_length": 308.0, + "completions/min_terminated_length": 308.0, + "entropy": 0.2513050399720669, + "epoch": 0.14433297122286834, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17615199089050293, + "learning_rate": 1e-06, + "loss": 0.0428, + "num_tokens": 364165736.0, + "reward": 0.46875, + "reward_std": 0.22765429317951202, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 847, + "tools/generated_tokens": 4925.71484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1227.23046875, + "completions/mean_terminated_length": 1118.283203125, + "completions/min_length": 132.0, + "completions/min_terminated_length": 132.0, + "entropy": 0.2549811312928796, + "epoch": 0.14450337614757067, + "frac_reward_zero_std": 0.125, + "grad_norm": 0.2387991100549698, + "learning_rate": 1e-06, + "loss": 0.043, + "num_tokens": 364566739.0, + "reward": 0.61328125, + "reward_std": 0.2959836721420288, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 848, + "tools/generated_tokens": 4091.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1233.2109375, + "completions/mean_terminated_length": 1091.1834716796875, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.2781702149659395, + "epoch": 0.144673781072273, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.2847515344619751, + "learning_rate": 1e-06, + "loss": 0.0288, + "num_tokens": 364969817.0, + "reward": 0.5625, + "reward_std": 0.36781418323516846, + "rewards/simpleverify_reward/mean": 0.5625, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 849, + "tools/generated_tokens": 4641.21875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1326.09375, + "completions/mean_terminated_length": 1155.207763671875, + "completions/min_length": 47.0, + "completions/min_terminated_length": 47.0, + "entropy": 0.2481433106586337, + "epoch": 0.14484418599697532, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.20920488238334656, + "learning_rate": 1e-06, + "loss": 0.0015, + "num_tokens": 365387313.0, + "reward": 0.51171875, + "reward_std": 0.2757830321788788, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 850, + "tools/generated_tokens": 4702.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1306.08203125, + "completions/mean_terminated_length": 1160.4765625, + "completions/min_length": 207.0, + "completions/min_terminated_length": 207.0, + "entropy": 0.29046835098415613, + "epoch": 0.14501459092167765, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.22018368542194366, + "learning_rate": 1e-06, + "loss": 0.0149, + "num_tokens": 365806582.0, + "reward": 0.54296875, + "reward_std": 0.28210610151290894, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 851, + "tools/generated_tokens": 4842.09375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1168.640625, + "completions/mean_terminated_length": 1077.67236328125, + "completions/min_length": 43.0, + "completions/min_terminated_length": 43.0, + "entropy": 0.2492090780287981, + "epoch": 0.14518499584637995, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2575002908706665, + "learning_rate": 1e-06, + "loss": 0.0154, + "num_tokens": 366191978.0, + "reward": 0.71484375, + "reward_std": 0.25507354736328125, + "rewards/simpleverify_reward/mean": 0.71484375, + "rewards/simpleverify_reward/std": 0.4523732364177704, + "step": 852, + "tools/generated_tokens": 4104.64453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05078125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1159.765625, + "completions/mean_terminated_length": 1112.246826171875, + "completions/min_length": 154.0, + "completions/min_terminated_length": 154.0, + "entropy": 0.26809254195541143, + "epoch": 0.14535540077108228, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.21709080040454865, + "learning_rate": 1e-06, + "loss": 0.0064, + "num_tokens": 366567502.0, + "reward": 0.6953125, + "reward_std": 0.2163851261138916, + "rewards/simpleverify_reward/mean": 0.6953125, + "rewards/simpleverify_reward/std": 0.4611765742301941, + "step": 853, + "tools/generated_tokens": 3975.7734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1090.95703125, + "completions/mean_terminated_length": 987.3809814453125, + "completions/min_length": 126.0, + "completions/min_terminated_length": 126.0, + "entropy": 0.25722133554518223, + "epoch": 0.1455258056957846, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.22183115780353546, + "learning_rate": 1e-06, + "loss": 0.019, + "num_tokens": 366930563.0, + "reward": 0.44921875, + "reward_std": 0.22523343563079834, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 854, + "tools/generated_tokens": 4474.97265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.65234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1141.69140625, + "completions/mean_terminated_length": 1077.2259521484375, + "completions/min_length": 85.0, + "completions/min_terminated_length": 85.0, + "entropy": 0.22780149802565575, + "epoch": 0.14569621062048693, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.23783008754253387, + "learning_rate": 1e-06, + "loss": -0.024, + "num_tokens": 367313236.0, + "reward": 0.37890625, + "reward_std": 0.27682238817214966, + "rewards/simpleverify_reward/mean": 0.37890625, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 855, + "tools/generated_tokens": 4045.69921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1299.5078125, + "completions/mean_terminated_length": 1108.7205810546875, + "completions/min_length": 177.0, + "completions/min_terminated_length": 177.0, + "entropy": 0.2706407178193331, + "epoch": 0.14586661554518926, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.23542420566082, + "learning_rate": 1e-06, + "loss": 0.0208, + "num_tokens": 367741014.0, + "reward": 0.45703125, + "reward_std": 0.30191951990127563, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 856, + "tools/generated_tokens": 5371.51953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.98828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1320.3828125, + "completions/mean_terminated_length": 1097.64794921875, + "completions/min_length": 329.0, + "completions/min_terminated_length": 329.0, + "entropy": 0.24583891034126282, + "epoch": 0.1460370204698916, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15358403325080872, + "learning_rate": 1e-06, + "loss": 0.0128, + "num_tokens": 368163304.0, + "reward": 0.3359375, + "reward_std": 0.19486366212368011, + "rewards/simpleverify_reward/mean": 0.3359375, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 857, + "tools/generated_tokens": 4944.37890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1308.16015625, + "completions/mean_terminated_length": 1175.2027587890625, + "completions/min_length": 103.0, + "completions/min_terminated_length": 103.0, + "entropy": 0.23788707703351974, + "epoch": 0.14620742539459392, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2616797387599945, + "learning_rate": 1e-06, + "loss": 0.0461, + "num_tokens": 368588721.0, + "reward": 0.56640625, + "reward_std": 0.2728821039199829, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 858, + "tools/generated_tokens": 4564.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1136.4140625, + "completions/mean_terminated_length": 1037.757568359375, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "entropy": 0.24072004668414593, + "epoch": 0.14637783031929621, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17764921486377716, + "learning_rate": 1e-06, + "loss": 0.0021, + "num_tokens": 368962491.0, + "reward": 0.6640625, + "reward_std": 0.24900493025779724, + "rewards/simpleverify_reward/mean": 0.6640625, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 859, + "tools/generated_tokens": 3832.41796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1249.99609375, + "completions/mean_terminated_length": 1115.1826171875, + "completions/min_length": 179.0, + "completions/min_terminated_length": 179.0, + "entropy": 0.2607467984780669, + "epoch": 0.14654823524399854, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19438406825065613, + "learning_rate": 1e-06, + "loss": 0.0155, + "num_tokens": 369364074.0, + "reward": 0.4921875, + "reward_std": 0.22457927465438843, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 860, + "tools/generated_tokens": 4490.01171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2017.0, + "completions/mean_length": 1199.83984375, + "completions/mean_terminated_length": 1014.0571899414062, + "completions/min_length": 111.0, + "completions/min_terminated_length": 111.0, + "entropy": 0.22630824986845255, + "epoch": 0.14671864016870087, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.20757856965065002, + "learning_rate": 1e-06, + "loss": 0.0283, + "num_tokens": 369762145.0, + "reward": 0.59765625, + "reward_std": 0.2621135711669922, + "rewards/simpleverify_reward/mean": 0.59765625, + "rewards/simpleverify_reward/std": 0.4913311004638672, + "step": 861, + "tools/generated_tokens": 4807.84375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1460.46875, + "completions/mean_terminated_length": 1295.9599609375, + "completions/min_length": 236.0, + "completions/min_terminated_length": 236.0, + "entropy": 0.2073358790948987, + "epoch": 0.1468890450934032, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.19643279910087585, + "learning_rate": 1e-06, + "loss": 0.0171, + "num_tokens": 370208905.0, + "reward": 0.5, + "reward_std": 0.19718992710113525, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 862, + "tools/generated_tokens": 4444.4765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.45703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1272.55859375, + "completions/mean_terminated_length": 1177.3333740234375, + "completions/min_length": 89.0, + "completions/min_terminated_length": 89.0, + "entropy": 0.23783125635236502, + "epoch": 0.14705945001810553, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.21768885850906372, + "learning_rate": 1e-06, + "loss": 0.0539, + "num_tokens": 370611256.0, + "reward": 0.65234375, + "reward_std": 0.3256661295890808, + "rewards/simpleverify_reward/mean": 0.65234375, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 863, + "tools/generated_tokens": 4288.56640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1246.03125, + "completions/mean_terminated_length": 1135.537841796875, + "completions/min_length": 241.0, + "completions/min_terminated_length": 241.0, + "entropy": 0.23799911607056856, + "epoch": 0.14722985494280785, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1385456621646881, + "learning_rate": 1e-06, + "loss": 0.0103, + "num_tokens": 371007232.0, + "reward": 0.58203125, + "reward_std": 0.14656277000904083, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 864, + "tools/generated_tokens": 4214.03515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1998.0, + "completions/mean_length": 1302.40625, + "completions/mean_terminated_length": 1134.7415771484375, + "completions/min_length": 97.0, + "completions/min_terminated_length": 97.0, + "entropy": 0.2315852651372552, + "epoch": 0.14740025986751018, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.23712506890296936, + "learning_rate": 1e-06, + "loss": 0.0337, + "num_tokens": 371425208.0, + "reward": 0.43359375, + "reward_std": 0.2718029022216797, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 865, + "tools/generated_tokens": 4678.4296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1190.7890625, + "completions/mean_terminated_length": 1022.5560302734375, + "completions/min_length": 213.0, + "completions/min_terminated_length": 213.0, + "entropy": 0.23144039418548346, + "epoch": 0.1475706647922125, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21178627014160156, + "learning_rate": 1e-06, + "loss": -0.0031, + "num_tokens": 371816770.0, + "reward": 0.390625, + "reward_std": 0.25879859924316406, + "rewards/simpleverify_reward/mean": 0.390625, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 866, + "tools/generated_tokens": 4334.7890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1192.51171875, + "completions/mean_terminated_length": 1078.9556884765625, + "completions/min_length": 152.0, + "completions/min_terminated_length": 152.0, + "entropy": 0.22740261629223824, + "epoch": 0.1477410697169148, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.19851236045360565, + "learning_rate": 1e-06, + "loss": 0.0209, + "num_tokens": 372204245.0, + "reward": 0.75390625, + "reward_std": 0.25119781494140625, + "rewards/simpleverify_reward/mean": 0.75390625, + "rewards/simpleverify_reward/std": 0.43157756328582764, + "step": 867, + "tools/generated_tokens": 4208.51953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1173.87109375, + "completions/mean_terminated_length": 1087.583740234375, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.21943960059434175, + "epoch": 0.14791147464161714, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.12678663432598114, + "learning_rate": 1e-06, + "loss": 0.025, + "num_tokens": 372576772.0, + "reward": 0.70703125, + "reward_std": 0.15920543670654297, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 868, + "tools/generated_tokens": 3549.8671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1251.59375, + "completions/mean_terminated_length": 1133.7489013671875, + "completions/min_length": 174.0, + "completions/min_terminated_length": 174.0, + "entropy": 0.21778283175081015, + "epoch": 0.14808187956631946, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.20096375048160553, + "learning_rate": 1e-06, + "loss": 0.0602, + "num_tokens": 372971404.0, + "reward": 0.5859375, + "reward_std": 0.2649868428707123, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 869, + "tools/generated_tokens": 3803.60546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1204.890625, + "completions/mean_terminated_length": 1062.4473876953125, + "completions/min_length": 140.0, + "completions/min_terminated_length": 140.0, + "entropy": 0.19960143137723207, + "epoch": 0.1482522844910218, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.22174742817878723, + "learning_rate": 1e-06, + "loss": -0.0078, + "num_tokens": 373357904.0, + "reward": 0.4375, + "reward_std": 0.23392276465892792, + "rewards/simpleverify_reward/mean": 0.4375, + "rewards/simpleverify_reward/std": 0.49705013632774353, + "step": 870, + "tools/generated_tokens": 4180.88671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1217.59765625, + "completions/mean_terminated_length": 1135.630859375, + "completions/min_length": 154.0, + "completions/min_terminated_length": 154.0, + "entropy": 0.21362486109137535, + "epoch": 0.14842268941572412, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.22009976208209991, + "learning_rate": 1e-06, + "loss": 0.0426, + "num_tokens": 373750297.0, + "reward": 0.6015625, + "reward_std": 0.2805894911289215, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 871, + "tools/generated_tokens": 4273.60546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1085.453125, + "completions/mean_terminated_length": 967.25, + "completions/min_length": 138.0, + "completions/min_terminated_length": 138.0, + "entropy": 0.2082717027515173, + "epoch": 0.14859309434042645, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19985249638557434, + "learning_rate": 1e-06, + "loss": 0.0332, + "num_tokens": 374110941.0, + "reward": 0.54296875, + "reward_std": 0.2113366276025772, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 872, + "tools/generated_tokens": 4093.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1409.828125, + "completions/mean_terminated_length": 1205.88134765625, + "completions/min_length": 180.0, + "completions/min_terminated_length": 180.0, + "entropy": 0.21090497635304928, + "epoch": 0.14876349926512877, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17528687417507172, + "learning_rate": 1e-06, + "loss": 0.0208, + "num_tokens": 374550833.0, + "reward": 0.33203125, + "reward_std": 0.1991586685180664, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 873, + "tools/generated_tokens": 4953.83203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.73046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1250.66796875, + "completions/mean_terminated_length": 1071.368408203125, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.1789833903312683, + "epoch": 0.14893390418983107, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.16894742846488953, + "learning_rate": 1e-06, + "loss": 0.0213, + "num_tokens": 374946844.0, + "reward": 0.625, + "reward_std": 0.15535868704319, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 874, + "tools/generated_tokens": 4274.67578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1344.4453125, + "completions/mean_terminated_length": 1271.6680908203125, + "completions/min_length": 22.0, + "completions/min_terminated_length": 22.0, + "entropy": 0.20491211488842964, + "epoch": 0.1491043091145334, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.23800332844257355, + "learning_rate": 1e-06, + "loss": 0.0262, + "num_tokens": 375364094.0, + "reward": 0.41796875, + "reward_std": 0.3040216565132141, + "rewards/simpleverify_reward/mean": 0.41796875, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 875, + "tools/generated_tokens": 4248.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2021.0, + "completions/mean_length": 1265.6796875, + "completions/mean_terminated_length": 1107.751220703125, + "completions/min_length": 179.0, + "completions/min_terminated_length": 179.0, + "entropy": 0.2280396344140172, + "epoch": 0.14927471403923573, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2741992175579071, + "learning_rate": 1e-06, + "loss": 0.0225, + "num_tokens": 375774604.0, + "reward": 0.64453125, + "reward_std": 0.25387370586395264, + "rewards/simpleverify_reward/mean": 0.64453125, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 876, + "tools/generated_tokens": 4601.6796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1310.984375, + "completions/mean_terminated_length": 1166.33642578125, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.18693785648792982, + "epoch": 0.14944511896393806, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.17376099526882172, + "learning_rate": 1e-06, + "loss": 0.0238, + "num_tokens": 376182008.0, + "reward": 0.53125, + "reward_std": 0.15056805312633514, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 877, + "tools/generated_tokens": 4078.98046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1252.6953125, + "completions/mean_terminated_length": 1109.7650146484375, + "completions/min_length": 190.0, + "completions/min_terminated_length": 190.0, + "entropy": 0.20417685620486736, + "epoch": 0.14961552388864038, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.23929616808891296, + "learning_rate": 1e-06, + "loss": 0.0107, + "num_tokens": 376577722.0, + "reward": 0.640625, + "reward_std": 0.2538875937461853, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 878, + "tools/generated_tokens": 4180.69921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2015.0, + "completions/mean_length": 1343.9296875, + "completions/mean_terminated_length": 1185.607666015625, + "completions/min_length": 137.0, + "completions/min_terminated_length": 137.0, + "entropy": 0.1986571168527007, + "epoch": 0.1497859288133427, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.17056000232696533, + "learning_rate": 1e-06, + "loss": 0.0141, + "num_tokens": 376988552.0, + "reward": 0.49609375, + "reward_std": 0.21545103192329407, + "rewards/simpleverify_reward/mean": 0.49609375, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 879, + "tools/generated_tokens": 4335.9375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1264.4453125, + "completions/mean_terminated_length": 1003.2604370117188, + "completions/min_length": 224.0, + "completions/min_terminated_length": 224.0, + "entropy": 0.23919691983610392, + "epoch": 0.14995633373804504, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.20202361047267914, + "learning_rate": 1e-06, + "loss": 0.0261, + "num_tokens": 377401130.0, + "reward": 0.5390625, + "reward_std": 0.19821478426456451, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 880, + "tools/generated_tokens": 4912.453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.78125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1375.703125, + "completions/mean_terminated_length": 1208.45849609375, + "completions/min_length": 168.0, + "completions/min_terminated_length": 168.0, + "entropy": 0.1874864399433136, + "epoch": 0.15012673866274737, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19059807062149048, + "learning_rate": 1e-06, + "loss": 0.0098, + "num_tokens": 377824862.0, + "reward": 0.546875, + "reward_std": 0.20356883108615875, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 881, + "tools/generated_tokens": 4351.70703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1278.25390625, + "completions/mean_terminated_length": 1067.6318359375, + "completions/min_length": 29.0, + "completions/min_terminated_length": 29.0, + "entropy": 0.16317385714501143, + "epoch": 0.15029714358744967, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1927434504032135, + "learning_rate": 1e-06, + "loss": 0.0203, + "num_tokens": 378230159.0, + "reward": 0.5546875, + "reward_std": 0.18301509320735931, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 882, + "tools/generated_tokens": 4422.26171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07421875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1092.3046875, + "completions/mean_terminated_length": 1015.687744140625, + "completions/min_length": 114.0, + "completions/min_terminated_length": 114.0, + "entropy": 0.23370731435716152, + "epoch": 0.150467548512152, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1970743089914322, + "learning_rate": 1e-06, + "loss": -0.0042, + "num_tokens": 378586701.0, + "reward": 0.66796875, + "reward_std": 0.15537451207637787, + "rewards/simpleverify_reward/mean": 0.66796875, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 883, + "tools/generated_tokens": 3844.30078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1140.59375, + "completions/mean_terminated_length": 1103.707275390625, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.2259423155337572, + "epoch": 0.15063795343685432, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.13444504141807556, + "learning_rate": 1e-06, + "loss": 0.0107, + "num_tokens": 378957957.0, + "reward": 0.30859375, + "reward_std": 0.11046826094388962, + "rewards/simpleverify_reward/mean": 0.30859375, + "rewards/simpleverify_reward/std": 0.46281787753105164, + "step": 884, + "tools/generated_tokens": 3348.6015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.30859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1404.51953125, + "completions/mean_terminated_length": 1117.31640625, + "completions/min_length": 147.0, + "completions/min_terminated_length": 147.0, + "entropy": 0.20274410769343376, + "epoch": 0.15080835836155665, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.20298629999160767, + "learning_rate": 1e-06, + "loss": 0.008, + "num_tokens": 379394634.0, + "reward": 0.39453125, + "reward_std": 0.27304062247276306, + "rewards/simpleverify_reward/mean": 0.39453125, + "rewards/simpleverify_reward/std": 0.48970720171928406, + "step": 885, + "tools/generated_tokens": 5220.515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.86328125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1136.1015625, + "completions/mean_terminated_length": 1010.4622192382812, + "completions/min_length": 104.0, + "completions/min_terminated_length": 104.0, + "entropy": 0.20719370152801275, + "epoch": 0.15097876328625898, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2245873510837555, + "learning_rate": 1e-06, + "loss": 0.0016, + "num_tokens": 379762052.0, + "reward": 0.625, + "reward_std": 0.22098566591739655, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 886, + "tools/generated_tokens": 3992.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.39453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1361.9453125, + "completions/mean_terminated_length": 1267.4222412109375, + "completions/min_length": 91.0, + "completions/min_terminated_length": 91.0, + "entropy": 0.19618909480050206, + "epoch": 0.1511491682109613, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18611346185207367, + "learning_rate": 1e-06, + "loss": 0.0204, + "num_tokens": 380180118.0, + "reward": 0.7265625, + "reward_std": 0.2028878629207611, + "rewards/simpleverify_reward/mean": 0.7265625, + "rewards/simpleverify_reward/std": 0.446596622467041, + "step": 887, + "tools/generated_tokens": 3873.94921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1240.75, + "completions/mean_terminated_length": 1073.2122802734375, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.2307307319715619, + "epoch": 0.15131957313566363, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.16559883952140808, + "learning_rate": 1e-06, + "loss": 0.0169, + "num_tokens": 380570262.0, + "reward": 0.6796875, + "reward_std": 0.1813678741455078, + "rewards/simpleverify_reward/mean": 0.6796875, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 888, + "tools/generated_tokens": 4008.765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1328.7890625, + "completions/mean_terminated_length": 1175.4171142578125, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.17856238782405853, + "epoch": 0.15148997806036593, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.19203080236911774, + "learning_rate": 1e-06, + "loss": 0.0436, + "num_tokens": 380984992.0, + "reward": 0.64453125, + "reward_std": 0.2495906949043274, + "rewards/simpleverify_reward/mean": 0.64453125, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 889, + "tools/generated_tokens": 4216.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1278.50390625, + "completions/mean_terminated_length": 1140.2120361328125, + "completions/min_length": 259.0, + "completions/min_terminated_length": 259.0, + "entropy": 0.2158465851098299, + "epoch": 0.15166038298506826, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18322642147541046, + "learning_rate": 1e-06, + "loss": 0.0167, + "num_tokens": 381391249.0, + "reward": 0.44921875, + "reward_std": 0.24208033084869385, + "rewards/simpleverify_reward/mean": 0.44921875, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 890, + "tools/generated_tokens": 4310.51171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.48046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1294.3984375, + "completions/mean_terminated_length": 1178.986572265625, + "completions/min_length": 8.0, + "completions/min_terminated_length": 8.0, + "entropy": 0.24783035833388567, + "epoch": 0.1518307879097706, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2206215113401413, + "learning_rate": 1e-06, + "loss": 0.002, + "num_tokens": 381794055.0, + "reward": 0.4453125, + "reward_std": 0.25406450033187866, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 891, + "tools/generated_tokens": 4190.3984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1200.45703125, + "completions/mean_terminated_length": 1048.1336669921875, + "completions/min_length": 87.0, + "completions/min_terminated_length": 87.0, + "entropy": 0.18117011338472366, + "epoch": 0.15200119283447291, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.22663454711437225, + "learning_rate": 1e-06, + "loss": 0.0287, + "num_tokens": 382189932.0, + "reward": 0.61328125, + "reward_std": 0.2720973491668701, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 892, + "tools/generated_tokens": 4168.45703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1351.14453125, + "completions/mean_terminated_length": 1147.0201416015625, + "completions/min_length": 173.0, + "completions/min_terminated_length": 173.0, + "entropy": 0.21165054757148027, + "epoch": 0.15217159775917524, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.20667289197444916, + "learning_rate": 1e-06, + "loss": 0.0094, + "num_tokens": 382622081.0, + "reward": 0.50390625, + "reward_std": 0.2092868983745575, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 893, + "tools/generated_tokens": 4871.15234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.71875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1301.109375, + "completions/mean_terminated_length": 1146.103759765625, + "completions/min_length": 199.0, + "completions/min_terminated_length": 199.0, + "entropy": 0.2058579958975315, + "epoch": 0.15234200268387757, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.232611745595932, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 383042045.0, + "reward": 0.53125, + "reward_std": 0.21808946132659912, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 894, + "tools/generated_tokens": 4501.11328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.5625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1296.546875, + "completions/mean_terminated_length": 1173.5908203125, + "completions/min_length": 145.0, + "completions/min_terminated_length": 145.0, + "entropy": 0.19320186413824558, + "epoch": 0.1525124076085799, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.21120324730873108, + "learning_rate": 1e-06, + "loss": 0.0315, + "num_tokens": 383450777.0, + "reward": 0.625, + "reward_std": 0.25999754667282104, + "rewards/simpleverify_reward/mean": 0.625, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 895, + "tools/generated_tokens": 4264.5625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1953125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1341.1640625, + "completions/mean_terminated_length": 1169.6068115234375, + "completions/min_length": 74.0, + "completions/min_terminated_length": 74.0, + "entropy": 0.1922745769843459, + "epoch": 0.15268281253328223, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1668403595685959, + "learning_rate": 1e-06, + "loss": 0.01, + "num_tokens": 383865779.0, + "reward": 0.53125, + "reward_std": 0.2108054757118225, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 896, + "tools/generated_tokens": 4181.1796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.38671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1342.609375, + "completions/mean_terminated_length": 1204.1822509765625, + "completions/min_length": 176.0, + "completions/min_terminated_length": 176.0, + "entropy": 0.25501332245767117, + "epoch": 0.15285321745798452, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.21012505888938904, + "learning_rate": 1e-06, + "loss": 0.0145, + "num_tokens": 384294639.0, + "reward": 0.578125, + "reward_std": 0.28183847665786743, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 897, + "tools/generated_tokens": 4694.625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.63671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1284.15234375, + "completions/mean_terminated_length": 1084.7388916015625, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.1856649974361062, + "epoch": 0.15302362238268685, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.18569427728652954, + "learning_rate": 1e-06, + "loss": 0.0005, + "num_tokens": 384701590.0, + "reward": 0.62109375, + "reward_std": 0.2376500368118286, + "rewards/simpleverify_reward/mean": 0.62109375, + "rewards/simpleverify_reward/std": 0.4860650300979614, + "step": 898, + "tools/generated_tokens": 4604.171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1340.22265625, + "completions/mean_terminated_length": 1189.284423828125, + "completions/min_length": 134.0, + "completions/min_terminated_length": 134.0, + "entropy": 0.19474520534276962, + "epoch": 0.15319402730738918, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1831023097038269, + "learning_rate": 1e-06, + "loss": 0.0066, + "num_tokens": 385131087.0, + "reward": 0.66015625, + "reward_std": 0.19540652632713318, + "rewards/simpleverify_reward/mean": 0.66015625, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 899, + "tools/generated_tokens": 4396.234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.27734375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1402.140625, + "completions/mean_terminated_length": 1154.2918701171875, + "completions/min_length": 181.0, + "completions/min_terminated_length": 181.0, + "entropy": 0.2115109683945775, + "epoch": 0.1533644322320915, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1206706091761589, + "learning_rate": 1e-06, + "loss": -0.0026, + "num_tokens": 385572579.0, + "reward": 0.34375, + "reward_std": 0.1441391110420227, + "rewards/simpleverify_reward/mean": 0.34375, + "rewards/simpleverify_reward/std": 0.47588926553726196, + "step": 900, + "tools/generated_tokens": 5066.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1246.53125, + "completions/mean_terminated_length": 1093.6976318359375, + "completions/min_length": 148.0, + "completions/min_terminated_length": 148.0, + "entropy": 0.23318169172853231, + "epoch": 0.15353483715679384, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.15183135867118835, + "learning_rate": 1e-06, + "loss": 0.0076, + "num_tokens": 385968043.0, + "reward": 0.3671875, + "reward_std": 0.12602485716342926, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 901, + "tools/generated_tokens": 4358.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.51953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2028.0, + "completions/mean_length": 1248.94921875, + "completions/mean_terminated_length": 1050.1658935546875, + "completions/min_length": 64.0, + "completions/min_terminated_length": 64.0, + "entropy": 0.23358885757625103, + "epoch": 0.15370524208149616, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2017795890569687, + "learning_rate": 1e-06, + "loss": 0.0204, + "num_tokens": 386380078.0, + "reward": 0.578125, + "reward_std": 0.2204269915819168, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 902, + "tools/generated_tokens": 4848.96484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1136.34375, + "completions/mean_terminated_length": 925.9663696289062, + "completions/min_length": 163.0, + "completions/min_terminated_length": 163.0, + "entropy": 0.19472294580191374, + "epoch": 0.1538756470061985, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21478179097175598, + "learning_rate": 1e-06, + "loss": -0.0075, + "num_tokens": 386757430.0, + "reward": 0.61328125, + "reward_std": 0.23600001633167267, + "rewards/simpleverify_reward/mean": 0.61328125, + "rewards/simpleverify_reward/std": 0.4879522919654846, + "step": 903, + "tools/generated_tokens": 4472.35546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.62890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1266.7890625, + "completions/mean_terminated_length": 1126.3870849609375, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.21280538849532604, + "epoch": 0.1540460519309008, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15427833795547485, + "learning_rate": 1e-06, + "loss": 0.006, + "num_tokens": 387160640.0, + "reward": 0.5859375, + "reward_std": 0.16581955552101135, + "rewards/simpleverify_reward/mean": 0.5859375, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 904, + "tools/generated_tokens": 4266.78125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.46484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1161.05078125, + "completions/mean_terminated_length": 1065.060546875, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "entropy": 0.21695744525641203, + "epoch": 0.15421645685560312, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.20708735287189484, + "learning_rate": 1e-06, + "loss": 0.01, + "num_tokens": 387528125.0, + "reward": 0.48828125, + "reward_std": 0.2473640739917755, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 905, + "tools/generated_tokens": 3993.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3828125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2029.0, + "completions/mean_length": 1177.70703125, + "completions/mean_terminated_length": 1070.8333740234375, + "completions/min_length": 139.0, + "completions/min_terminated_length": 139.0, + "entropy": 0.17662752140313387, + "epoch": 0.15438686178030545, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1443532109260559, + "learning_rate": 1e-06, + "loss": 0.0088, + "num_tokens": 387904866.0, + "reward": 0.57421875, + "reward_std": 0.21190981566905975, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 906, + "tools/generated_tokens": 3745.70703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2034.0, + "completions/mean_length": 1357.25, + "completions/mean_terminated_length": 1150.3958740234375, + "completions/min_length": 236.0, + "completions/min_terminated_length": 236.0, + "entropy": 0.21647846046835184, + "epoch": 0.15455726670500777, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.16806887090206146, + "learning_rate": 1e-06, + "loss": 0.0405, + "num_tokens": 388331746.0, + "reward": 0.35546875, + "reward_std": 0.22239765524864197, + "rewards/simpleverify_reward/mean": 0.35546875, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 907, + "tools/generated_tokens": 4861.2734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1234.98828125, + "completions/mean_terminated_length": 1097.630126953125, + "completions/min_length": 188.0, + "completions/min_terminated_length": 188.0, + "entropy": 0.19143771193921566, + "epoch": 0.1547276716297101, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2009657621383667, + "learning_rate": 1e-06, + "loss": 0.0132, + "num_tokens": 388730927.0, + "reward": 0.6640625, + "reward_std": 0.23140643537044525, + "rewards/simpleverify_reward/mean": 0.6640625, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 908, + "tools/generated_tokens": 4042.98828125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1170.640625, + "completions/mean_terminated_length": 1054.181396484375, + "completions/min_length": 163.0, + "completions/min_terminated_length": 163.0, + "entropy": 0.20012648031115532, + "epoch": 0.15489807655441243, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.23060695827007294, + "learning_rate": 1e-06, + "loss": 0.024, + "num_tokens": 389110627.0, + "reward": 0.5546875, + "reward_std": 0.25087815523147583, + "rewards/simpleverify_reward/mean": 0.5546875, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 909, + "tools/generated_tokens": 3882.66015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.32421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1316.2265625, + "completions/mean_terminated_length": 1188.669677734375, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.21804927103221416, + "epoch": 0.15506848147911476, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.18070954084396362, + "learning_rate": 1e-06, + "loss": 0.0337, + "num_tokens": 389524445.0, + "reward": 0.4921875, + "reward_std": 0.13503573834896088, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 910, + "tools/generated_tokens": 4292.2265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1330.4375, + "completions/mean_terminated_length": 1185.5821533203125, + "completions/min_length": 37.0, + "completions/min_terminated_length": 37.0, + "entropy": 0.18107119668275118, + "epoch": 0.15523888640381708, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.20166006684303284, + "learning_rate": 1e-06, + "loss": 0.0078, + "num_tokens": 389937549.0, + "reward": 0.70703125, + "reward_std": 0.140625, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 911, + "tools/generated_tokens": 4026.44140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.20703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1256.0625, + "completions/mean_terminated_length": 1049.305419921875, + "completions/min_length": 134.0, + "completions/min_terminated_length": 134.0, + "entropy": 0.20553940907120705, + "epoch": 0.15540929132851938, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.23448815941810608, + "learning_rate": 1e-06, + "loss": 0.0559, + "num_tokens": 390345181.0, + "reward": 0.59375, + "reward_std": 0.31379520893096924, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 912, + "tools/generated_tokens": 4744.07421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1273.60546875, + "completions/mean_terminated_length": 1130.2037353515625, + "completions/min_length": 56.0, + "completions/min_terminated_length": 56.0, + "entropy": 0.23516938649117947, + "epoch": 0.1555796962532217, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.31176838278770447, + "learning_rate": 1e-06, + "loss": 0.0179, + "num_tokens": 390757384.0, + "reward": 0.45703125, + "reward_std": 0.29775792360305786, + "rewards/simpleverify_reward/mean": 0.45703125, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 913, + "tools/generated_tokens": 4697.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1287.6796875, + "completions/mean_terminated_length": 1064.9595947265625, + "completions/min_length": 269.0, + "completions/min_terminated_length": 269.0, + "entropy": 0.18021881766617298, + "epoch": 0.15575010117792404, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1972075253725052, + "learning_rate": 1e-06, + "loss": 0.0231, + "num_tokens": 391178486.0, + "reward": 0.4453125, + "reward_std": 0.22765429317951202, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 914, + "tools/generated_tokens": 4479.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.55859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1197.62890625, + "completions/mean_terminated_length": 1093.2017822265625, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.17587225325405598, + "epoch": 0.15592050610262637, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2013789713382721, + "learning_rate": 1e-06, + "loss": 0.0405, + "num_tokens": 391564215.0, + "reward": 0.44140625, + "reward_std": 0.23513562977313995, + "rewards/simpleverify_reward/mean": 0.44140625, + "rewards/simpleverify_reward/std": 0.4975275993347168, + "step": 915, + "tools/generated_tokens": 3749.6328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.24609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1212.76953125, + "completions/mean_terminated_length": 1097.693359375, + "completions/min_length": 81.0, + "completions/min_terminated_length": 81.0, + "entropy": 0.20062597934156656, + "epoch": 0.1560909110273287, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.22711578011512756, + "learning_rate": 1e-06, + "loss": 0.0259, + "num_tokens": 391955548.0, + "reward": 0.3671875, + "reward_std": 0.2617396414279938, + "rewards/simpleverify_reward/mean": 0.3671875, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 916, + "tools/generated_tokens": 4452.76953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.58203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1143.77734375, + "completions/mean_terminated_length": 1028.2642822265625, + "completions/min_length": 184.0, + "completions/min_terminated_length": 184.0, + "entropy": 0.2103537656366825, + "epoch": 0.15626131595203102, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.3819078505039215, + "learning_rate": 1e-06, + "loss": 0.0275, + "num_tokens": 392318771.0, + "reward": 0.6015625, + "reward_std": 0.26345717906951904, + "rewards/simpleverify_reward/mean": 0.6015625, + "rewards/simpleverify_reward/std": 0.4905354380607605, + "step": 917, + "tools/generated_tokens": 3743.78515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.26953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1201.98828125, + "completions/mean_terminated_length": 1089.6903076171875, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "entropy": 0.2054003458470106, + "epoch": 0.15643172087673335, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21380577981472015, + "learning_rate": 1e-06, + "loss": -0.0105, + "num_tokens": 392700240.0, + "reward": 0.74609375, + "reward_std": 0.24856583774089813, + "rewards/simpleverify_reward/mean": 0.74609375, + "rewards/simpleverify_reward/std": 0.4360972046852112, + "step": 918, + "tools/generated_tokens": 3634.00390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1192.12890625, + "completions/mean_terminated_length": 1074.2088623046875, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.18549406621605158, + "epoch": 0.15660212580143565, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2368357628583908, + "learning_rate": 1e-06, + "loss": 0.0114, + "num_tokens": 393083217.0, + "reward": 0.42578125, + "reward_std": 0.22039085626602173, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 919, + "tools/generated_tokens": 4032.12890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.38671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.18359375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1145.1953125, + "completions/mean_terminated_length": 942.1722412109375, + "completions/min_length": 111.0, + "completions/min_terminated_length": 111.0, + "entropy": 0.2100257547572255, + "epoch": 0.15677253072613798, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18646378815174103, + "learning_rate": 1e-06, + "loss": 0.016, + "num_tokens": 393456195.0, + "reward": 0.4921875, + "reward_std": 0.20379294455051422, + "rewards/simpleverify_reward/mean": 0.4921875, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 920, + "tools/generated_tokens": 4545.19921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.66015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1257.6796875, + "completions/mean_terminated_length": 1115.6497802734375, + "completions/min_length": 76.0, + "completions/min_terminated_length": 76.0, + "entropy": 0.18473036121577024, + "epoch": 0.1569429356508403, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18240104615688324, + "learning_rate": 1e-06, + "loss": 0.0187, + "num_tokens": 393849057.0, + "reward": 0.59375, + "reward_std": 0.200038880109787, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 921, + "tools/generated_tokens": 3633.6953125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.16015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1241.90234375, + "completions/mean_terminated_length": 1126.75, + "completions/min_length": 139.0, + "completions/min_terminated_length": 139.0, + "entropy": 0.20337208593264222, + "epoch": 0.15711334057554263, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.244186669588089, + "learning_rate": 1e-06, + "loss": -0.0163, + "num_tokens": 394235896.0, + "reward": 0.640625, + "reward_std": 0.2629890441894531, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 922, + "tools/generated_tokens": 4041.91015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1259.125, + "completions/mean_terminated_length": 1113.0369873046875, + "completions/min_length": 163.0, + "completions/min_terminated_length": 163.0, + "entropy": 0.1947414893656969, + "epoch": 0.15728374550024496, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2345479279756546, + "learning_rate": 1e-06, + "loss": 0.0259, + "num_tokens": 394632216.0, + "reward": 0.609375, + "reward_std": 0.26596033573150635, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48884621262550354, + "step": 923, + "tools/generated_tokens": 4483.13671875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1324.84765625, + "completions/mean_terminated_length": 1210.3258056640625, + "completions/min_length": 144.0, + "completions/min_terminated_length": 144.0, + "entropy": 0.16528822854161263, + "epoch": 0.1574541504249473, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1458037942647934, + "learning_rate": 1e-06, + "loss": 0.0349, + "num_tokens": 395037009.0, + "reward": 0.50390625, + "reward_std": 0.20013156533241272, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 924, + "tools/generated_tokens": 3564.85546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.09375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1370.15234375, + "completions/mean_terminated_length": 1251.995361328125, + "completions/min_length": 124.0, + "completions/min_terminated_length": 124.0, + "entropy": 0.15608528349548578, + "epoch": 0.15762455534964961, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.18368294835090637, + "learning_rate": 1e-06, + "loss": -0.0132, + "num_tokens": 395445784.0, + "reward": 0.65625, + "reward_std": 0.15746080875396729, + "rewards/simpleverify_reward/mean": 0.65625, + "rewards/simpleverify_reward/std": 0.47588926553726196, + "step": 925, + "tools/generated_tokens": 3242.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.9140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.22265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1223.41015625, + "completions/mean_terminated_length": 987.2361450195312, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "entropy": 0.19397277850657701, + "epoch": 0.15779496027435194, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.19794364273548126, + "learning_rate": 1e-06, + "loss": 0.0025, + "num_tokens": 395846481.0, + "reward": 0.640625, + "reward_std": 0.21940405666828156, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 926, + "tools/generated_tokens": 4551.42578125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1133.734375, + "completions/mean_terminated_length": 1021.4649047851562, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.1861046152189374, + "epoch": 0.15796536519905424, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.1933472603559494, + "learning_rate": 1e-06, + "loss": 0.0618, + "num_tokens": 396219517.0, + "reward": 0.58203125, + "reward_std": 0.27970924973487854, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 927, + "tools/generated_tokens": 3845.75, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.32421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1169.3359375, + "completions/mean_terminated_length": 1052.7080078125, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.21599995903670788, + "epoch": 0.15813577012375657, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.22696195542812347, + "learning_rate": 1e-06, + "loss": 0.0578, + "num_tokens": 396604019.0, + "reward": 0.42578125, + "reward_std": 0.2724819481372833, + "rewards/simpleverify_reward/mean": 0.42578125, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 928, + "tools/generated_tokens": 4089.35546875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.42578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1162.99609375, + "completions/mean_terminated_length": 1058.650634765625, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "entropy": 0.1994457310065627, + "epoch": 0.1583061750484589, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.20095284283161163, + "learning_rate": 1e-06, + "loss": 0.0012, + "num_tokens": 396991666.0, + "reward": 0.73046875, + "reward_std": 0.31450045108795166, + "rewards/simpleverify_reward/mean": 0.73046875, + "rewards/simpleverify_reward/std": 0.44458550214767456, + "step": 929, + "tools/generated_tokens": 4106.99609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1176.62109375, + "completions/mean_terminated_length": 1082.3203125, + "completions/min_length": 66.0, + "completions/min_terminated_length": 66.0, + "entropy": 0.19552788324654102, + "epoch": 0.15847657997316122, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.25160905718803406, + "learning_rate": 1e-06, + "loss": 0.028, + "num_tokens": 397365649.0, + "reward": 0.70703125, + "reward_std": 0.2381068766117096, + "rewards/simpleverify_reward/mean": 0.70703125, + "rewards/simpleverify_reward/std": 0.45601576566696167, + "step": 930, + "tools/generated_tokens": 3832.6171875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1275.07421875, + "completions/mean_terminated_length": 1144.4931640625, + "completions/min_length": 35.0, + "completions/min_terminated_length": 35.0, + "entropy": 0.2244763569906354, + "epoch": 0.15864698489786355, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.233364075422287, + "learning_rate": 1e-06, + "loss": 0.0386, + "num_tokens": 397765892.0, + "reward": 0.359375, + "reward_std": 0.2717758119106293, + "rewards/simpleverify_reward/mean": 0.359375, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 931, + "tools/generated_tokens": 4499.0703125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.57421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2023.0, + "completions/mean_length": 1099.98828125, + "completions/mean_terminated_length": 1032.5606689453125, + "completions/min_length": 103.0, + "completions/min_terminated_length": 103.0, + "entropy": 0.19109783880412579, + "epoch": 0.15881738982256588, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.19230061769485474, + "learning_rate": 1e-06, + "loss": 0.0152, + "num_tokens": 398114241.0, + "reward": 0.69921875, + "reward_std": 0.20291273295879364, + "rewards/simpleverify_reward/mean": 0.69921875, + "rewards/simpleverify_reward/std": 0.45949608087539673, + "step": 932, + "tools/generated_tokens": 3227.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.0390625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1120.76953125, + "completions/mean_terminated_length": 1020.419921875, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "entropy": 0.2288867114111781, + "epoch": 0.1589877947472682, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.1770629733800888, + "learning_rate": 1e-06, + "loss": -0.0085, + "num_tokens": 398498438.0, + "reward": 0.5390625, + "reward_std": 0.1281953752040863, + "rewards/simpleverify_reward/mean": 0.5390625, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 933, + "tools/generated_tokens": 3984.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1165.0625, + "completions/mean_terminated_length": 1043.413330078125, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "entropy": 0.20659015513956547, + "epoch": 0.1591581996719705, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.17342914640903473, + "learning_rate": 1e-06, + "loss": 0.0243, + "num_tokens": 398861878.0, + "reward": 0.640625, + "reward_std": 0.1900683045387268, + "rewards/simpleverify_reward/mean": 0.640625, + "rewards/simpleverify_reward/std": 0.4807571768760681, + "step": 934, + "tools/generated_tokens": 3821.0625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23046875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1366.2421875, + "completions/mean_terminated_length": 1162.06591796875, + "completions/min_length": 52.0, + "completions/min_terminated_length": 52.0, + "entropy": 0.18999553378671408, + "epoch": 0.15932860459667283, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.17357800900936127, + "learning_rate": 1e-06, + "loss": 0.0062, + "num_tokens": 399286980.0, + "reward": 0.4609375, + "reward_std": 0.1857442855834961, + "rewards/simpleverify_reward/mean": 0.4609375, + "rewards/simpleverify_reward/std": 0.4994482398033142, + "step": 935, + "tools/generated_tokens": 4846.25, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.69921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1336.984375, + "completions/mean_terminated_length": 1172.90869140625, + "completions/min_length": 170.0, + "completions/min_terminated_length": 170.0, + "entropy": 0.1968188900500536, + "epoch": 0.15949900952137516, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.22350461781024933, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 399703600.0, + "reward": 0.6875, + "reward_std": 0.19047126173973083, + "rewards/simpleverify_reward/mean": 0.6875, + "rewards/simpleverify_reward/std": 0.4644203782081604, + "step": 936, + "tools/generated_tokens": 4032.9921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.31640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2037.0, + "completions/mean_length": 1305.95703125, + "completions/mean_terminated_length": 1121.3560791015625, + "completions/min_length": 83.0, + "completions/min_terminated_length": 83.0, + "entropy": 0.217020932585001, + "epoch": 0.1596694144460775, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.24502287805080414, + "learning_rate": 1e-06, + "loss": -0.014, + "num_tokens": 400126325.0, + "reward": 0.421875, + "reward_std": 0.33309003710746765, + "rewards/simpleverify_reward/mean": 0.421875, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 937, + "tools/generated_tokens": 4889.96484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.75, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1342.07421875, + "completions/mean_terminated_length": 1106.765625, + "completions/min_length": 156.0, + "completions/min_terminated_length": 156.0, + "entropy": 0.1982845589518547, + "epoch": 0.15983981937077982, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.187627911567688, + "learning_rate": 1e-06, + "loss": -0.0023, + "num_tokens": 400550472.0, + "reward": 0.4453125, + "reward_std": 0.17278027534484863, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 938, + "tools/generated_tokens": 4750.078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1264.84765625, + "completions/mean_terminated_length": 1164.810546875, + "completions/min_length": 110.0, + "completions/min_terminated_length": 110.0, + "entropy": 0.24043539352715015, + "epoch": 0.16001022429548215, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.24755193293094635, + "learning_rate": 1e-06, + "loss": 0.0113, + "num_tokens": 400954257.0, + "reward": 0.58203125, + "reward_std": 0.21994972229003906, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 939, + "tools/generated_tokens": 4320.859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.17578125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1276.05859375, + "completions/mean_terminated_length": 1111.4266357421875, + "completions/min_length": 50.0, + "completions/min_terminated_length": 50.0, + "entropy": 0.2400244725868106, + "epoch": 0.16018062922018447, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.24372680485248566, + "learning_rate": 1e-06, + "loss": 0.0201, + "num_tokens": 401363920.0, + "reward": 0.546875, + "reward_std": 0.13896197080612183, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 940, + "tools/generated_tokens": 4292.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.47265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1179.33203125, + "completions/mean_terminated_length": 1032.5753173828125, + "completions/min_length": 33.0, + "completions/min_terminated_length": 33.0, + "entropy": 0.21102703362703323, + "epoch": 0.1603510341448868, + "frac_reward_zero_std": 0.1875, + "grad_norm": 0.2658357620239258, + "learning_rate": 1e-06, + "loss": 0.0311, + "num_tokens": 401748581.0, + "reward": 0.54296875, + "reward_std": 0.32371947169303894, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 941, + "tools/generated_tokens": 4155.3515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.453125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1272.9140625, + "completions/mean_terminated_length": 1112.056640625, + "completions/min_length": 88.0, + "completions/min_terminated_length": 88.0, + "entropy": 0.22449059505015612, + "epoch": 0.1605214390695891, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.19246172904968262, + "learning_rate": 1e-06, + "loss": 0.0102, + "num_tokens": 402152543.0, + "reward": 0.4453125, + "reward_std": 0.20106375217437744, + "rewards/simpleverify_reward/mean": 0.4453125, + "rewards/simpleverify_reward/std": 0.49797385931015015, + "step": 942, + "tools/generated_tokens": 4584.91015625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.6171875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1186.09765625, + "completions/mean_terminated_length": 1054.09912109375, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "entropy": 0.21194147039204836, + "epoch": 0.16069184399429143, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2552310526371002, + "learning_rate": 1e-06, + "loss": -0.0058, + "num_tokens": 402538872.0, + "reward": 0.33203125, + "reward_std": 0.27799171209335327, + "rewards/simpleverify_reward/mean": 0.33203125, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 943, + "tools/generated_tokens": 4330.09765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.06640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2024.0, + "completions/mean_length": 1106.9609375, + "completions/mean_terminated_length": 1040.0250244140625, + "completions/min_length": 99.0, + "completions/min_terminated_length": 99.0, + "entropy": 0.19545185193419456, + "epoch": 0.16086224891899376, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.15534307062625885, + "learning_rate": 1e-06, + "loss": 0.0433, + "num_tokens": 402894190.0, + "reward": 0.546875, + "reward_std": 0.1898059844970703, + "rewards/simpleverify_reward/mean": 0.546875, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 944, + "tools/generated_tokens": 3722.96484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.27734375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2030.0, + "completions/mean_length": 1353.01171875, + "completions/mean_terminated_length": 1208.7783203125, + "completions/min_length": 109.0, + "completions/min_terminated_length": 109.0, + "entropy": 0.17680383892729878, + "epoch": 0.16103265384369608, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15160922706127167, + "learning_rate": 1e-06, + "loss": 0.0074, + "num_tokens": 403308593.0, + "reward": 0.51171875, + "reward_std": 0.17399311065673828, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 945, + "tools/generated_tokens": 4017.0234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.30078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1167.4921875, + "completions/mean_terminated_length": 1041.7054443359375, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.19234557263553143, + "epoch": 0.1612030587683984, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2004779428243637, + "learning_rate": 1e-06, + "loss": 0.0113, + "num_tokens": 403684095.0, + "reward": 0.6796875, + "reward_std": 0.21445102989673615, + "rewards/simpleverify_reward/mean": 0.6796875, + "rewards/simpleverify_reward/std": 0.4675106406211853, + "step": 946, + "tools/generated_tokens": 3695.4921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2032.0, + "completions/mean_length": 1355.140625, + "completions/mean_terminated_length": 1234.3760986328125, + "completions/min_length": 212.0, + "completions/min_terminated_length": 212.0, + "entropy": 0.19640544150024652, + "epoch": 0.16137346369310074, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.1715097427368164, + "learning_rate": 1e-06, + "loss": 0.0064, + "num_tokens": 404107219.0, + "reward": 0.43359375, + "reward_std": 0.2175418734550476, + "rewards/simpleverify_reward/mean": 0.43359375, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 947, + "tools/generated_tokens": 3915.15625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.25, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1080.94140625, + "completions/mean_terminated_length": 994.5233764648438, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.25844348035752773, + "epoch": 0.16154386861780307, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.288810670375824, + "learning_rate": 1e-06, + "loss": -0.008, + "num_tokens": 404473140.0, + "reward": 0.48828125, + "reward_std": 0.3043562173843384, + "rewards/simpleverify_reward/mean": 0.48828125, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 948, + "tools/generated_tokens": 4504.94140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1171875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1190.6171875, + "completions/mean_terminated_length": 1076.8096923828125, + "completions/min_length": 85.0, + "completions/min_terminated_length": 85.0, + "entropy": 0.20195814687758684, + "epoch": 0.16171427354250537, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.20876309275627136, + "learning_rate": 1e-06, + "loss": 0.0089, + "num_tokens": 404847442.0, + "reward": 0.57421875, + "reward_std": 0.2607692778110504, + "rewards/simpleverify_reward/mean": 0.57421875, + "rewards/simpleverify_reward/std": 0.49542948603630066, + "step": 949, + "tools/generated_tokens": 3862.625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3046875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1257.79296875, + "completions/mean_terminated_length": 1098.267578125, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "entropy": 0.21015969943255186, + "epoch": 0.1618846784672077, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2141178846359253, + "learning_rate": 1e-06, + "loss": 0.0167, + "num_tokens": 405259853.0, + "reward": 0.4140625, + "reward_std": 0.24502673745155334, + "rewards/simpleverify_reward/mean": 0.4140625, + "rewards/simpleverify_reward/std": 0.4935242533683777, + "step": 950, + "tools/generated_tokens": 4609.7890625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.63671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.11328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1153.875, + "completions/mean_terminated_length": 1039.6607666015625, + "completions/min_length": 70.0, + "completions/min_terminated_length": 70.0, + "entropy": 0.21854657400399446, + "epoch": 0.16205508339191002, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2705812454223633, + "learning_rate": 1e-06, + "loss": 0.0238, + "num_tokens": 405636029.0, + "reward": 0.59375, + "reward_std": 0.2981289029121399, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 951, + "tools/generated_tokens": 3657.8984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.22265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2038.0, + "completions/mean_length": 1120.4921875, + "completions/mean_terminated_length": 973.6018676757812, + "completions/min_length": 130.0, + "completions/min_terminated_length": 130.0, + "entropy": 0.21976852603256702, + "epoch": 0.16222548831661235, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.24892780184745789, + "learning_rate": 1e-06, + "loss": 0.0135, + "num_tokens": 406003275.0, + "reward": 0.59375, + "reward_std": 0.2539531886577606, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 952, + "tools/generated_tokens": 4160.48046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1041.95703125, + "completions/mean_terminated_length": 903.3555908203125, + "completions/min_length": 122.0, + "completions/min_terminated_length": 122.0, + "entropy": 0.2250966327264905, + "epoch": 0.16239589324131468, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2442435622215271, + "learning_rate": 1e-06, + "loss": 0.02, + "num_tokens": 406352480.0, + "reward": 0.58203125, + "reward_std": 0.255632221698761, + "rewards/simpleverify_reward/mean": 0.58203125, + "rewards/simpleverify_reward/std": 0.49419113993644714, + "step": 953, + "tools/generated_tokens": 4065.984375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1265.515625, + "completions/mean_terminated_length": 1180.83544921875, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "entropy": 0.19781662989407778, + "epoch": 0.162566298166017, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18892425298690796, + "learning_rate": 1e-06, + "loss": 0.0153, + "num_tokens": 406736564.0, + "reward": 0.47265625, + "reward_std": 0.22665932774543762, + "rewards/simpleverify_reward/mean": 0.47265625, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 954, + "tools/generated_tokens": 3449.5234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.06640625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1179.8125, + "completions/mean_terminated_length": 1051.33642578125, + "completions/min_length": 133.0, + "completions/min_terminated_length": 133.0, + "entropy": 0.2072059204801917, + "epoch": 0.16273670309071933, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.3045743405818939, + "learning_rate": 1e-06, + "loss": 0.0233, + "num_tokens": 407119156.0, + "reward": 0.484375, + "reward_std": 0.21433541178703308, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 955, + "tools/generated_tokens": 4283.8125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1281.796875, + "completions/mean_terminated_length": 1209.7607421875, + "completions/min_length": 35.0, + "completions/min_terminated_length": 35.0, + "entropy": 0.2013545837253332, + "epoch": 0.16290710801542166, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2171400785446167, + "learning_rate": 1e-06, + "loss": -0.0003, + "num_tokens": 407509248.0, + "reward": 0.7421875, + "reward_std": 0.16526088118553162, + "rewards/simpleverify_reward/mean": 0.7421875, + "rewards/simpleverify_reward/std": 0.4382871091365814, + "step": 956, + "tools/generated_tokens": 3489.80078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.078125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1328125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2008.0, + "completions/mean_length": 1150.7734375, + "completions/mean_terminated_length": 1013.369384765625, + "completions/min_length": 106.0, + "completions/min_terminated_length": 106.0, + "entropy": 0.21062565967440605, + "epoch": 0.16307751294012396, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.2558295428752899, + "learning_rate": 1e-06, + "loss": 0.0383, + "num_tokens": 407882486.0, + "reward": 0.578125, + "reward_std": 0.2771115005016327, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 957, + "tools/generated_tokens": 4174.77734375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4765625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0703125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1234.33203125, + "completions/mean_terminated_length": 1172.8026123046875, + "completions/min_length": 59.0, + "completions/min_terminated_length": 59.0, + "entropy": 0.1879670936614275, + "epoch": 0.1632479178648263, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.20825929939746857, + "learning_rate": 1e-06, + "loss": 0.0132, + "num_tokens": 408270203.0, + "reward": 0.51953125, + "reward_std": 0.1550418734550476, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 958, + "tools/generated_tokens": 3330.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.0234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2036.0, + "completions/mean_length": 1231.3203125, + "completions/mean_terminated_length": 1146.836181640625, + "completions/min_length": 108.0, + "completions/min_terminated_length": 108.0, + "entropy": 0.19753658398985863, + "epoch": 0.16341832278952861, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2578915059566498, + "learning_rate": 1e-06, + "loss": 0.0275, + "num_tokens": 408651037.0, + "reward": 0.54296875, + "reward_std": 0.20081061124801636, + "rewards/simpleverify_reward/mean": 0.54296875, + "rewards/simpleverify_reward/std": 0.4991260766983032, + "step": 959, + "tools/generated_tokens": 3703.3203125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.20703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1165.65234375, + "completions/mean_terminated_length": 1061.6287841796875, + "completions/min_length": 82.0, + "completions/min_terminated_length": 82.0, + "entropy": 0.1973144132643938, + "epoch": 0.16358872771423094, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.4418896436691284, + "learning_rate": 1e-06, + "loss": 0.0358, + "num_tokens": 409025428.0, + "reward": 0.76171875, + "reward_std": 0.22073253989219666, + "rewards/simpleverify_reward/mean": 0.76171875, + "rewards/simpleverify_reward/std": 0.4268665909767151, + "step": 960, + "tools/generated_tokens": 3741.6640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2578125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.23828125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1984.0, + "completions/mean_length": 1351.88671875, + "completions/mean_terminated_length": 1134.1334228515625, + "completions/min_length": 34.0, + "completions/min_terminated_length": 34.0, + "entropy": 0.2260230714455247, + "epoch": 0.16375913263893327, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.3573962450027466, + "learning_rate": 1e-06, + "loss": 0.0469, + "num_tokens": 409452327.0, + "reward": 0.5, + "reward_std": 0.25913122296333313, + "rewards/simpleverify_reward/mean": 0.5, + "rewards/simpleverify_reward/std": 0.5009794235229492, + "step": 961, + "tools/generated_tokens": 5143.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.8515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15234375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1308.61328125, + "completions/mean_terminated_length": 1175.741943359375, + "completions/min_length": 137.0, + "completions/min_terminated_length": 137.0, + "entropy": 0.20521524269133806, + "epoch": 0.1639295375636356, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21179236471652985, + "learning_rate": 1e-06, + "loss": 0.0267, + "num_tokens": 409858932.0, + "reward": 0.5078125, + "reward_std": 0.21785868704319, + "rewards/simpleverify_reward/mean": 0.5078125, + "rewards/simpleverify_reward/std": 0.5009182691574097, + "step": 962, + "tools/generated_tokens": 4276.640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.44921875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1232.58984375, + "completions/mean_terminated_length": 1094.8310546875, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "entropy": 0.1739743510261178, + "epoch": 0.16409994248833792, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21300075948238373, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 410257307.0, + "reward": 0.55078125, + "reward_std": 0.23030208051204681, + "rewards/simpleverify_reward/mean": 0.55078125, + "rewards/simpleverify_reward/std": 0.49838894605636597, + "step": 963, + "tools/generated_tokens": 4272.59765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2265625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1364.80859375, + "completions/mean_terminated_length": 1164.6817626953125, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.22997727058827877, + "epoch": 0.16427034741304022, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.21561212837696075, + "learning_rate": 1e-06, + "loss": 0.0133, + "num_tokens": 410691706.0, + "reward": 0.375, + "reward_std": 0.24502672255039215, + "rewards/simpleverify_reward/mean": 0.375, + "rewards/simpleverify_reward/std": 0.4850712716579437, + "step": 964, + "tools/generated_tokens": 4988.8046875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1168.1484375, + "completions/mean_terminated_length": 1037.9462890625, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "entropy": 0.1943550305441022, + "epoch": 0.16444075233774255, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.2026391625404358, + "learning_rate": 1e-06, + "loss": 0.0092, + "num_tokens": 411062496.0, + "reward": 0.50390625, + "reward_std": 0.209515780210495, + "rewards/simpleverify_reward/mean": 0.50390625, + "rewards/simpleverify_reward/std": 0.5009641647338867, + "step": 965, + "tools/generated_tokens": 3648.16796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1311.71484375, + "completions/mean_terminated_length": 1124.039306640625, + "completions/min_length": 55.0, + "completions/min_terminated_length": 55.0, + "entropy": 0.1801592716947198, + "epoch": 0.16461115726244488, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.18748624622821808, + "learning_rate": 1e-06, + "loss": 0.018, + "num_tokens": 411468439.0, + "reward": 0.64453125, + "reward_std": 0.23199693858623505, + "rewards/simpleverify_reward/mean": 0.64453125, + "rewards/simpleverify_reward/std": 0.4795927405357361, + "step": 966, + "tools/generated_tokens": 4183.72265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2040.0, + "completions/mean_length": 1228.61328125, + "completions/mean_terminated_length": 1085.7843017578125, + "completions/min_length": 27.0, + "completions/min_terminated_length": 27.0, + "entropy": 0.19476659875363111, + "epoch": 0.1647815621871472, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1987626701593399, + "learning_rate": 1e-06, + "loss": 0.0043, + "num_tokens": 411857396.0, + "reward": 0.52734375, + "reward_std": 0.20765095949172974, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 967, + "tools/generated_tokens": 4028.62109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.21484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1249.9609375, + "completions/mean_terminated_length": 1031.6019287109375, + "completions/min_length": 49.0, + "completions/min_terminated_length": 49.0, + "entropy": 0.19307413510978222, + "epoch": 0.16495196711184953, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2244318574666977, + "learning_rate": 1e-06, + "loss": 0.0038, + "num_tokens": 412257210.0, + "reward": 0.51953125, + "reward_std": 0.2428291141986847, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 968, + "tools/generated_tokens": 4417.97265625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1268.875, + "completions/mean_terminated_length": 1157.575927734375, + "completions/min_length": 24.0, + "completions/min_terminated_length": 24.0, + "entropy": 0.16396487969905138, + "epoch": 0.16512237203655186, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.1905089020729065, + "learning_rate": 1e-06, + "loss": 0.0368, + "num_tokens": 412649162.0, + "reward": 0.52734375, + "reward_std": 0.20970112085342407, + "rewards/simpleverify_reward/mean": 0.52734375, + "rewards/simpleverify_reward/std": 0.5002297759056091, + "step": 969, + "tools/generated_tokens": 3532.87109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.10546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2025.0, + "completions/mean_length": 1328.0703125, + "completions/mean_terminated_length": 1170.3857421875, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "entropy": 0.21298102103173733, + "epoch": 0.1652927769612542, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2038000077009201, + "learning_rate": 1e-06, + "loss": 0.0095, + "num_tokens": 413065244.0, + "reward": 0.56640625, + "reward_std": 0.21533125638961792, + "rewards/simpleverify_reward/mean": 0.56640625, + "rewards/simpleverify_reward/std": 0.4965413510799408, + "step": 970, + "tools/generated_tokens": 4264.078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.43359375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1234.1640625, + "completions/mean_terminated_length": 1069.8779296875, + "completions/min_length": 32.0, + "completions/min_terminated_length": 32.0, + "entropy": 0.19236394576728344, + "epoch": 0.16546318188595652, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.30821314454078674, + "learning_rate": 1e-06, + "loss": 0.0423, + "num_tokens": 413454998.0, + "reward": 0.63671875, + "reward_std": 0.24197597801685333, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 971, + "tools/generated_tokens": 4114.1796875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1080.46484375, + "completions/mean_terminated_length": 989.5000610351562, + "completions/min_length": 61.0, + "completions/min_terminated_length": 61.0, + "entropy": 0.18256896920502186, + "epoch": 0.16563358681065882, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1335376352071762, + "learning_rate": 1e-06, + "loss": 0.0069, + "num_tokens": 413805933.0, + "reward": 0.6484375, + "reward_std": 0.1290597915649414, + "rewards/simpleverify_reward/mean": 0.6484375, + "rewards/simpleverify_reward/std": 0.47839346528053284, + "step": 972, + "tools/generated_tokens": 3336.4609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1015625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1226.20703125, + "completions/mean_terminated_length": 1112.9822998046875, + "completions/min_length": 76.0, + "completions/min_terminated_length": 76.0, + "entropy": 0.1840164577588439, + "epoch": 0.16580399173536114, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2416093945503235, + "learning_rate": 1e-06, + "loss": 0.0301, + "num_tokens": 414188722.0, + "reward": 0.53125, + "reward_std": 0.21763455867767334, + "rewards/simpleverify_reward/mean": 0.53125, + "rewards/simpleverify_reward/std": 0.5, + "step": 973, + "tools/generated_tokens": 3690.21484375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.203125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1155.48828125, + "completions/mean_terminated_length": 1032.5244140625, + "completions/min_length": 118.0, + "completions/min_terminated_length": 118.0, + "entropy": 0.18110597226768732, + "epoch": 0.16597439666006347, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.20290687680244446, + "learning_rate": 1e-06, + "loss": 0.0092, + "num_tokens": 414564111.0, + "reward": 0.5234375, + "reward_std": 0.19332927465438843, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 974, + "tools/generated_tokens": 3979.50390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37890625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2043.0, + "completions/mean_length": 1180.51171875, + "completions/mean_terminated_length": 1029.302734375, + "completions/min_length": 130.0, + "completions/min_terminated_length": 130.0, + "entropy": 0.17978204507380724, + "epoch": 0.1661448015847658, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.28163033723831177, + "learning_rate": 1e-06, + "loss": 0.0058, + "num_tokens": 414940434.0, + "reward": 0.65234375, + "reward_std": 0.18606582283973694, + "rewards/simpleverify_reward/mean": 0.65234375, + "rewards/simpleverify_reward/std": 0.4771590530872345, + "step": 975, + "tools/generated_tokens": 4044.5234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.3984375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14453125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2039.0, + "completions/mean_length": 1200.0546875, + "completions/mean_terminated_length": 1056.799072265625, + "completions/min_length": 43.0, + "completions/min_terminated_length": 43.0, + "entropy": 0.22708263341337442, + "epoch": 0.16631520650946813, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.2369316816329956, + "learning_rate": 1e-06, + "loss": 0.0155, + "num_tokens": 415329808.0, + "reward": 0.32421875, + "reward_std": 0.14484524726867676, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 976, + "tools/generated_tokens": 4008.05859375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.37109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1239.33203125, + "completions/mean_terminated_length": 1127.9244384765625, + "completions/min_length": 16.0, + "completions/min_terminated_length": 16.0, + "entropy": 0.22214957047253847, + "epoch": 0.16648561143417046, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.2298741340637207, + "learning_rate": 1e-06, + "loss": -0.0313, + "num_tokens": 415739989.0, + "reward": 0.32421875, + "reward_std": 0.16318362951278687, + "rewards/simpleverify_reward/mean": 0.32421875, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 977, + "tools/generated_tokens": 4807.34375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.7421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1172.3515625, + "completions/mean_terminated_length": 1121.6982421875, + "completions/min_length": 58.0, + "completions/min_terminated_length": 58.0, + "entropy": 0.18435638770461082, + "epoch": 0.16665601635887278, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.25405094027519226, + "learning_rate": 1e-06, + "loss": 0.0251, + "num_tokens": 416113183.0, + "reward": 0.66796875, + "reward_std": 0.2175418734550476, + "rewards/simpleverify_reward/mean": 0.66796875, + "rewards/simpleverify_reward/std": 0.4718646705150604, + "step": 978, + "tools/generated_tokens": 3444.36328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.109375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1196.91796875, + "completions/mean_terminated_length": 1079.6622314453125, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "entropy": 0.19085302762687206, + "epoch": 0.16682642128357508, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.18096967041492462, + "learning_rate": 1e-06, + "loss": -0.0045, + "num_tokens": 416501482.0, + "reward": 0.66015625, + "reward_std": 0.14843884110450745, + "rewards/simpleverify_reward/mean": 0.66015625, + "rewards/simpleverify_reward/std": 0.47458380460739136, + "step": 979, + "tools/generated_tokens": 4188.9296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2035.0, + "completions/mean_length": 1293.6875, + "completions/mean_terminated_length": 1047.46630859375, + "completions/min_length": 81.0, + "completions/min_terminated_length": 81.0, + "entropy": 0.20483372081071138, + "epoch": 0.1669968262082774, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.19149084389209747, + "learning_rate": 1e-06, + "loss": -0.0025, + "num_tokens": 416925322.0, + "reward": 0.4296875, + "reward_std": 0.16531282663345337, + "rewards/simpleverify_reward/mean": 0.4296875, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 980, + "tools/generated_tokens": 4917.69921875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.76953125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1137.03125, + "completions/mean_terminated_length": 1011.5244750976562, + "completions/min_length": 28.0, + "completions/min_terminated_length": 28.0, + "entropy": 0.19713956397026777, + "epoch": 0.16716723113297974, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.2730614244937897, + "learning_rate": 1e-06, + "loss": 0.0402, + "num_tokens": 417294562.0, + "reward": 0.63671875, + "reward_std": 0.29895299673080444, + "rewards/simpleverify_reward/mean": 0.63671875, + "rewards/simpleverify_reward/std": 0.48188701272010803, + "step": 981, + "tools/generated_tokens": 4281.0390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.53515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1484375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2020.0, + "completions/mean_length": 1198.89453125, + "completions/mean_terminated_length": 1050.889892578125, + "completions/min_length": 37.0, + "completions/min_terminated_length": 37.0, + "entropy": 0.20358057040721178, + "epoch": 0.16733763605768207, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.18416868150234222, + "learning_rate": 1e-06, + "loss": 0.0248, + "num_tokens": 417681303.0, + "reward": 0.41015625, + "reward_std": 0.17484626173973083, + "rewards/simpleverify_reward/mean": 0.41015625, + "rewards/simpleverify_reward/std": 0.49282538890838623, + "step": 982, + "tools/generated_tokens": 4486.90234375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.60546875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.203125, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1989.0, + "completions/mean_length": 1394.984375, + "completions/mean_terminated_length": 1228.5343017578125, + "completions/min_length": 156.0, + "completions/min_terminated_length": 156.0, + "entropy": 0.19067569728940725, + "epoch": 0.1675080409823844, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1773720234632492, + "learning_rate": 1e-06, + "loss": 0.013, + "num_tokens": 418105539.0, + "reward": 0.5234375, + "reward_std": 0.13862934708595276, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 983, + "tools/generated_tokens": 4266.99609375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.40234375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.140625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1234.515625, + "completions/mean_terminated_length": 1101.41357421875, + "completions/min_length": 45.0, + "completions/min_terminated_length": 45.0, + "entropy": 0.19633601233363152, + "epoch": 0.16767844590708672, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.26622453331947327, + "learning_rate": 1e-06, + "loss": 0.0421, + "num_tokens": 418508039.0, + "reward": 0.453125, + "reward_std": 0.3073006868362427, + "rewards/simpleverify_reward/mean": 0.453125, + "rewards/simpleverify_reward/std": 0.4987730085849762, + "step": 984, + "tools/generated_tokens": 4722.53515625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.703125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.10546875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1246.8125, + "completions/mean_terminated_length": 1152.353759765625, + "completions/min_length": 86.0, + "completions/min_terminated_length": 86.0, + "entropy": 0.17976173013448715, + "epoch": 0.16784885083178905, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.20381921529769897, + "learning_rate": 1e-06, + "loss": 0.0085, + "num_tokens": 418899975.0, + "reward": 0.5234375, + "reward_std": 0.1424899697303772, + "rewards/simpleverify_reward/mean": 0.5234375, + "rewards/simpleverify_reward/std": 0.5004287362098694, + "step": 985, + "tools/generated_tokens": 3790.81640625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.2421875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.08984375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2031.0, + "completions/mean_length": 1116.67578125, + "completions/mean_terminated_length": 1024.742431640625, + "completions/min_length": 12.0, + "completions/min_terminated_length": 12.0, + "entropy": 0.2113470109179616, + "epoch": 0.16801925575649138, + "frac_reward_zero_std": 0.3125, + "grad_norm": 0.3020055592060089, + "learning_rate": 1e-06, + "loss": 0.0463, + "num_tokens": 419259636.0, + "reward": 0.5703125, + "reward_std": 0.3027361035346985, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 986, + "tools/generated_tokens": 3740.68359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28125, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1640625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1353.3828125, + "completions/mean_terminated_length": 1217.070068359375, + "completions/min_length": 67.0, + "completions/min_terminated_length": 67.0, + "entropy": 0.17643773183226585, + "epoch": 0.16818966068119368, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.278083860874176, + "learning_rate": 1e-06, + "loss": 0.0207, + "num_tokens": 419676006.0, + "reward": 0.53515625, + "reward_std": 0.3173474967479706, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 987, + "tools/generated_tokens": 4257.390625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.41796875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.16796875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 1987.0, + "completions/mean_length": 1175.90625, + "completions/mean_terminated_length": 999.8591918945312, + "completions/min_length": 80.0, + "completions/min_terminated_length": 80.0, + "entropy": 0.17600448476150632, + "epoch": 0.168360065605896, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.8599228262901306, + "learning_rate": 1e-06, + "loss": 0.0503, + "num_tokens": 420054462.0, + "reward": 0.68359375, + "reward_std": 0.21124649047851562, + "rewards/simpleverify_reward/mean": 0.68359375, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 988, + "tools/generated_tokens": 3823.93359375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.29296875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25390625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2047.0, + "completions/mean_length": 1444.875, + "completions/mean_terminated_length": 1239.623046875, + "completions/min_length": 68.0, + "completions/min_terminated_length": 68.0, + "entropy": 0.1784317335113883, + "epoch": 0.16853047053059833, + "frac_reward_zero_std": 0.6875, + "grad_norm": 0.15630988776683807, + "learning_rate": 1e-06, + "loss": 0.0231, + "num_tokens": 420491358.0, + "reward": 0.46875, + "reward_std": 0.13708871603012085, + "rewards/simpleverify_reward/mean": 0.46875, + "rewards/simpleverify_reward/std": 0.5, + "step": 989, + "tools/generated_tokens": 4388.875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.19921875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1280.92578125, + "completions/mean_terminated_length": 1090.1024169921875, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "entropy": 0.15464992634952068, + "epoch": 0.16870087545530066, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.15311479568481445, + "learning_rate": 1e-06, + "loss": 0.025, + "num_tokens": 420898923.0, + "reward": 0.484375, + "reward_std": 0.16736772656440735, + "rewards/simpleverify_reward/mean": 0.484375, + "rewards/simpleverify_reward/std": 0.5007347464561462, + "step": 990, + "tools/generated_tokens": 4176.9296875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.4140625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.24609375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2022.0, + "completions/mean_length": 1220.37890625, + "completions/mean_terminated_length": 950.2383422851562, + "completions/min_length": 40.0, + "completions/min_terminated_length": 40.0, + "entropy": 0.20445020589977503, + "epoch": 0.16887128038000299, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.21980640292167664, + "learning_rate": 1e-06, + "loss": 0.0123, + "num_tokens": 421295740.0, + "reward": 0.53515625, + "reward_std": 0.22017385065555573, + "rewards/simpleverify_reward/mean": 0.53515625, + "rewards/simpleverify_reward/std": 0.49973952770233154, + "step": 991, + "tools/generated_tokens": 4924.39453125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.80859375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2033.0, + "completions/mean_length": 1144.33203125, + "completions/mean_terminated_length": 1084.0875244140625, + "completions/min_length": 18.0, + "completions/min_terminated_length": 18.0, + "entropy": 0.15982358064502478, + "epoch": 0.1690416853047053, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.17256851494312286, + "learning_rate": 1e-06, + "loss": 0.006, + "num_tokens": 421657249.0, + "reward": 0.59375, + "reward_std": 0.15364307165145874, + "rewards/simpleverify_reward/mean": 0.59375, + "rewards/simpleverify_reward/std": 0.49209436774253845, + "step": 992, + "tools/generated_tokens": 3064.328125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 0.9375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.13671875, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2041.0, + "completions/mean_length": 1210.2109375, + "completions/mean_terminated_length": 1077.5294189453125, + "completions/min_length": 64.0, + "completions/min_terminated_length": 64.0, + "entropy": 0.15531280264258385, + "epoch": 0.16921209022940764, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.2234199047088623, + "learning_rate": 1e-06, + "loss": 0.0265, + "num_tokens": 422055767.0, + "reward": 0.51953125, + "reward_std": 0.23968850076198578, + "rewards/simpleverify_reward/mean": 0.51953125, + "rewards/simpleverify_reward/std": 0.5005971193313599, + "step": 993, + "tools/generated_tokens": 4250.2109375, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.484375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1015625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1132.078125, + "completions/mean_terminated_length": 1028.54345703125, + "completions/min_length": 65.0, + "completions/min_terminated_length": 65.0, + "entropy": 0.17533218674361706, + "epoch": 0.16938249515410994, + "frac_reward_zero_std": 0.375, + "grad_norm": 0.2538447678089142, + "learning_rate": 1e-06, + "loss": 0.0267, + "num_tokens": 422425643.0, + "reward": 0.68359375, + "reward_std": 0.23952803015708923, + "rewards/simpleverify_reward/mean": 0.68359375, + "rewards/simpleverify_reward/std": 0.4659844934940338, + "step": 994, + "tools/generated_tokens": 3884.078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.34375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2046.0, + "completions/mean_length": 1189.30859375, + "completions/mean_terminated_length": 1083.859619140625, + "completions/min_length": 78.0, + "completions/min_terminated_length": 78.0, + "entropy": 0.15691512124612927, + "epoch": 0.16955290007881227, + "frac_reward_zero_std": 0.4375, + "grad_norm": 0.27894335985183716, + "learning_rate": 1e-06, + "loss": 0.0241, + "num_tokens": 422801290.0, + "reward": 0.51171875, + "reward_std": 0.2060009390115738, + "rewards/simpleverify_reward/mean": 0.51171875, + "rewards/simpleverify_reward/std": 0.5008418560028076, + "step": 995, + "tools/generated_tokens": 3517.32421875, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.13671875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2109375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2044.0, + "completions/mean_length": 1229.9921875, + "completions/mean_terminated_length": 1011.3316650390625, + "completions/min_length": 80.0, + "completions/min_terminated_length": 80.0, + "entropy": 0.20765979401767254, + "epoch": 0.1697233050035146, + "frac_reward_zero_std": 0.5625, + "grad_norm": 0.2927990257740021, + "learning_rate": 1e-06, + "loss": 0.0113, + "num_tokens": 423197944.0, + "reward": 0.5703125, + "reward_std": 0.17539192736148834, + "rewards/simpleverify_reward/mean": 0.5703125, + "rewards/simpleverify_reward/std": 0.4960011839866638, + "step": 996, + "tools/generated_tokens": 4806.0078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.74609375, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0859375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2045.0, + "completions/mean_length": 1160.9375, + "completions/mean_terminated_length": 1077.53857421875, + "completions/min_length": 54.0, + "completions/min_terminated_length": 54.0, + "entropy": 0.16144005861133337, + "epoch": 0.16989370992821692, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.2151246964931488, + "learning_rate": 1e-06, + "loss": 0.0023, + "num_tokens": 423567784.0, + "reward": 0.6640625, + "reward_std": 0.14523236453533173, + "rewards/simpleverify_reward/mean": 0.6640625, + "rewards/simpleverify_reward/std": 0.4732423722743988, + "step": 997, + "tools/generated_tokens": 3656.94140625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.21875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09765625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2048.0, + "completions/mean_length": 1187.34765625, + "completions/mean_terminated_length": 1094.207763671875, + "completions/min_length": 53.0, + "completions/min_terminated_length": 53.0, + "entropy": 0.17552885971963406, + "epoch": 0.17006411485291925, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.20247352123260498, + "learning_rate": 1e-06, + "loss": 0.0279, + "num_tokens": 423946833.0, + "reward": 0.6328125, + "reward_std": 0.1361129879951477, + "rewards/simpleverify_reward/mean": 0.6328125, + "rewards/simpleverify_reward/std": 0.48298248648643494, + "step": 998, + "tools/generated_tokens": 3619.34765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.1875, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.12890625, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2042.0, + "completions/mean_length": 1233.8515625, + "completions/mean_terminated_length": 1113.3721923828125, + "completions/min_length": 38.0, + "completions/min_terminated_length": 38.0, + "entropy": 0.18420981895178556, + "epoch": 0.17023451977762158, + "frac_reward_zero_std": 0.625, + "grad_norm": 0.1994447112083435, + "learning_rate": 1e-06, + "loss": 0.012, + "num_tokens": 424343691.0, + "reward": 0.578125, + "reward_std": 0.14711037278175354, + "rewards/simpleverify_reward/mean": 0.578125, + "rewards/simpleverify_reward/std": 0.49482619762420654, + "step": 999, + "tools/generated_tokens": 3737.84765625, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.22265625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09375, + "completions/max_length": 2048.0, + "completions/max_terminated_length": 2027.0, + "completions/mean_length": 1153.54296875, + "completions/mean_terminated_length": 1061.012939453125, + "completions/min_length": 94.0, + "completions/min_terminated_length": 94.0, + "entropy": 0.17195395100861788, + "epoch": 0.1704049247023239, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.1732359230518341, + "learning_rate": 1e-06, + "loss": 0.0353, + "num_tokens": 424717334.0, + "reward": 0.67578125, + "reward_std": 0.16813471913337708, + "rewards/simpleverify_reward/mean": 0.67578125, + "rewards/simpleverify_reward/std": 0.46899911761283875, + "step": 1000, + "tools/generated_tokens": 3785.55078125, + "tools/num_python": 0.0, + "tools/num_python_exec_error": 0.0, + "tools/num_retrieval": 0.0, + "tools/num_retriever_exec_error": 0.0, + "tools/num_saving": 1.28515625, + "tools/num_saving_exec_error": 0.0, + "tools/num_saving_forced": 0.0, + "tools/num_saving_invalid_use": 0.0, + "tools/num_tool_detect_error": 0.0 + }, + { + "epoch": 0.1704049247023239, + "step": 1000, + "total_flos": 0.0, + "train_loss": 0.0017749488347908481, + "train_runtime": 14122.464, + "train_samples_per_second": 18.127, + "train_steps_per_second": 0.071 + } + ], + "logging_steps": 1, + "max_steps": 1000, + "num_input_tokens_seen": 424717334, + "num_train_epochs": 1, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}